feedstock 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b478a8e9dd24f3ac78e99189c959b247828f7a2980a979cc320a3b6f8c5306d1
4
- data.tar.gz: 957fb142e5abef9289ca92f40ecf6f7425106a40271da4967c80c27f2b69c4eb
3
+ metadata.gz: f1a02c229edb1b2d7c98904d6263aab47cfe5ef4d605c5a3c78ec412c1bb2083
4
+ data.tar.gz: f0c35d3a675eeb01cbbc73952b85f484df631459a1542161d2b198b8c3b1ccf8
5
5
  SHA512:
6
- metadata.gz: '0895e3a795d26151fc74a79107e0873afe8f0d99fe955cc4fffd94342d335075852ae71abbbfc61d47a637fc06e00c73c17f0ad023e7a712e2dd5e015e393554'
7
- data.tar.gz: b4ef2dbf847a910813d4187b71208c50f773613cca75173152ed6f9343b3de220c98917aa074060851c89e72433f14866cf16539cf357725f8f3c5587d406062
6
+ metadata.gz: b31805cfc5c8aedaabf286f2a76b269df4883daf24850121a94edb349a219c1d7177e161df20a434d07b17eed3bd129c7529d94093559fc5520fe19bc0dc2b45
7
+ data.tar.gz: b47eac95bda32a4a5a7a7a4a904d4baeb6b1055a18e39f09d1d2ed38c1c7b0a6ab9d0672db4f614b210d5bb1969afbd3de962292d6ccf750895e00f6a4b13d6c
data/README.md CHANGED
@@ -28,7 +28,7 @@ template but you can use your own, too.
28
28
  The [feeds.inqk.net repository][example] includes an example of how the
29
29
  Feedstock library can be used in practice.
30
30
 
31
- [example]: https://github.com/pyrmont/feeds.inqk.net/tree/4a95a438f8d3a707db7946238181ab76c029ee77/src/input
31
+ [example]: https://github.com/pyrmont/feeds.inqk.net/
32
32
  "An example of using the Feedstock library"
33
33
 
34
34
  ## Installation
@@ -51,17 +51,17 @@ url = "https://example.org"
51
51
 
52
52
  # Define the rules
53
53
  rules = { info: { id: url,
54
- title: "div.title",
55
- updated: "span.date" },
56
- entries: "div.story",
57
- entry: { id: { path: "a",
58
- content: { attribute: "href" } },
59
- title: "h2",
60
- updated: "span.date",
61
- author: "span.byline",
62
- link: { path: "a",
63
- content: { attribute: "href" } },
64
- summary: "div.summary" } }
54
+ title: Feedstock::Extract.new(selector: "div.title"),
55
+ updated: Feedstock::Extract.new(selector: "span.date") },
56
+
57
+ entry: { id: Feedstock::Extract.new(selector: "a", content: { attribute: "href" }),
58
+ title: Feedstock::Extract.new(selector: "h2"),
59
+ updated: Feedstock::Extract.new(selector: "span.date"),
60
+ author: Feedstock::Extract.new(selector: "span.byline"),
61
+ link: Feedstock::Extract.new(selector: "a", content: { attribute: "href" }),
62
+ summary: Feedstock::Extract.new(selector: "div.summary") },
63
+
64
+ entries: Feedstock::Extract.new(selector: "div.story") }
65
65
 
66
66
  # Using the default format and template
67
67
  Feedstock.feed url, rules
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Feedstock
4
- VERSION = "0.3.0"
4
+ VERSION = "0.4.0"
5
5
  end
data/lib/feedstock.rb CHANGED
@@ -6,14 +6,22 @@ require "open-uri"
6
6
  require "timeliness"
7
7
 
8
8
  module Feedstock
9
+ class Extract < Struct.new("Extract", :selector, :absolute, :content, :processor, :prefix,
10
+ :suffix, :type, :filter, keyword_init: true); end
11
+
9
12
  class << self
10
- def feed(url, rules, format = :html, template_file = "#{__dir__}/../default.xml")
13
+ def data(url, rules, format = :html)
11
14
  page = download_page url, format
12
- rules = normalise_rules rules
13
15
 
14
16
  info = extract_info page, rules
15
17
  entries = extract_entries page, rules
16
18
 
19
+ { info: info, entries: entries }
20
+ end
21
+
22
+ def feed(url, rules, format = :html, template_file = "#{__dir__}/../default.xml")
23
+ info, entries = data(url, rules, format).values_at(:info, :entries)
24
+
17
25
  create_feed info, entries, template_file
18
26
  end
19
27
 
@@ -34,7 +42,7 @@ module Feedstock
34
42
  end
35
43
 
36
44
  private def extract_content(node, rule)
37
- case rule[:content]
45
+ case rule.content
38
46
  in { attribute: attribute }
39
47
  node[attribute]
40
48
  in "inner_html"
@@ -59,12 +67,12 @@ module Feedstock
59
67
  entries = Array.new
60
68
 
61
69
  rules[:entry].each do |name, rule|
62
- if rule[:literal]
63
- static[name.to_s] = rule[:literal]
64
- elsif rule[:repeat]
65
- static[name.to_s] = format_content page.at_css(rule[:path]), rule
70
+ if rule.is_a? String
71
+ static[name.to_s] = rule
72
+ elsif rule.absolute
73
+ static[name.to_s] = format_content page.at_css(rule.selector), rule
66
74
  else
67
- page.css(rule[:path]).each.with_index do |match, i|
75
+ page.css(rule.selector).each.with_index do |match, i|
68
76
  entries[i] = Hash.new if entries[i].nil?
69
77
  entries[i].merge!({ name.to_s => format_content(match, rule) })
70
78
  end
@@ -81,16 +89,18 @@ module Feedstock
81
89
  private def extract_entries_wrapped(page, rules)
82
90
  entries = Array.new
83
91
 
84
- page.css(rules[:entries][:path]).each.with_index do |node, i|
92
+ page.css(rules[:entries].selector).each.with_index do |parent, i|
85
93
  rules[:entry].each do |name, rule|
86
94
  entries[i] = Hash.new if entries[i].nil?
87
95
 
88
- content = if rule[:literal]
89
- rule[:literal]
90
- elsif rule[:repeat]
91
- format_content page.at_css(rule[:path]), rule
96
+ content = if rule.is_a? String
97
+ rule
98
+ elsif rule.absolute
99
+ format_content page.at_css(rule.selector), rule
100
+ elsif rule.selector.empty?
101
+ format_content parent, rule
92
102
  else
93
- format_content node.at_css(rule[:path]), rule
103
+ format_content parent.at_css(rule.selector), rule
94
104
  end
95
105
 
96
106
  entries[i].merge!({ name.to_s => content })
@@ -98,19 +108,19 @@ module Feedstock
98
108
  end
99
109
 
100
110
 
101
- return entries unless rules[:entries][:filter].is_a? Proc
111
+ return entries unless rules[:entries].filter.is_a? Proc
102
112
 
103
- entries.filter(&rules[:entries][:filter])
113
+ entries.filter(&rules[:entries].filter)
104
114
  end
105
115
 
106
116
  private def extract_info(page, rules)
107
117
  info = Hash.new
108
118
 
109
119
  rules[:info].each do |name, rule|
110
- if rule[:literal]
111
- info[name.to_s] = rule[:literal]
120
+ if rule.is_a? String
121
+ info[name.to_s] = rule
112
122
  else
113
- info[name.to_s] = format_content page.at_css(rule[:path]), rule
123
+ info[name.to_s] = format_content page.at_css(rule.selector), rule
114
124
  end
115
125
  end
116
126
 
@@ -124,7 +134,7 @@ module Feedstock
124
134
  processed = process_content text, rule
125
135
  wrapped = wrap_content processed, rule
126
136
 
127
- case rule[:type]
137
+ case rule.type
128
138
  when "cdata"
129
139
  "<![CDATA[#{wrapped}]]>"
130
140
  when "datetime"
@@ -134,34 +144,18 @@ module Feedstock
134
144
  end
135
145
  end
136
146
 
137
- private def normalise_rules(rules)
138
- rules.keys.each do |category|
139
- case category
140
- when :info, :entry
141
- rules[category].each do |name, rule|
142
- rules[category][name] = { :path => rule } unless rule.is_a? Hash
143
- end
144
- when :entries
145
- rule = rules[category]
146
- rules[category] = { :path => rule } unless rule.is_a? Hash
147
- end
148
- end
149
-
150
- rules
151
- end
152
-
153
147
  private def process_content(content, rule)
154
- if rule[:processor]
155
- rule[:processor].call content, rule
148
+ if rule.processor
149
+ rule.processor.call content, rule
156
150
  else
157
151
  content
158
152
  end
159
153
  end
160
154
 
161
155
  private def wrap_content(content, rule)
162
- return content unless rule[:prepend] || rule[:append]
156
+ return content unless (rule.prefix || rule.suffix)
163
157
 
164
- "#{rule[:prepend]}#{content}#{rule[:append]}"
158
+ "#{rule.prefix}#{content}#{rule.suffix}"
165
159
  end
166
160
  end
167
161
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedstock
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Camilleri
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2021-02-06 00:00:00.000000000 Z
10
+ date: 2025-02-21 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: nokogiri
@@ -101,7 +100,6 @@ licenses:
101
100
  metadata:
102
101
  documentation_uri: https://github.com/pyrmont/feedstock/blob/v0.3.0/api.md
103
102
  allowed_push_host: https://rubygems.org
104
- post_install_message:
105
103
  rdoc_options: []
106
104
  require_paths:
107
105
  - lib
@@ -116,8 +114,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
114
  - !ruby/object:Gem::Version
117
115
  version: '0'
118
116
  requirements: []
119
- rubygems_version: 3.2.3
120
- signing_key:
117
+ rubygems_version: 3.6.2
121
118
  specification_version: 4
122
119
  summary: A library for creating RSS feeds from webpages
123
120
  test_files: []