feedstock 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b478a8e9dd24f3ac78e99189c959b247828f7a2980a979cc320a3b6f8c5306d1
4
- data.tar.gz: 957fb142e5abef9289ca92f40ecf6f7425106a40271da4967c80c27f2b69c4eb
3
+ metadata.gz: f1a02c229edb1b2d7c98904d6263aab47cfe5ef4d605c5a3c78ec412c1bb2083
4
+ data.tar.gz: f0c35d3a675eeb01cbbc73952b85f484df631459a1542161d2b198b8c3b1ccf8
5
5
  SHA512:
6
- metadata.gz: '0895e3a795d26151fc74a79107e0873afe8f0d99fe955cc4fffd94342d335075852ae71abbbfc61d47a637fc06e00c73c17f0ad023e7a712e2dd5e015e393554'
7
- data.tar.gz: b4ef2dbf847a910813d4187b71208c50f773613cca75173152ed6f9343b3de220c98917aa074060851c89e72433f14866cf16539cf357725f8f3c5587d406062
6
+ metadata.gz: b31805cfc5c8aedaabf286f2a76b269df4883daf24850121a94edb349a219c1d7177e161df20a434d07b17eed3bd129c7529d94093559fc5520fe19bc0dc2b45
7
+ data.tar.gz: b47eac95bda32a4a5a7a7a4a904d4baeb6b1055a18e39f09d1d2ed38c1c7b0a6ab9d0672db4f614b210d5bb1969afbd3de962292d6ccf750895e00f6a4b13d6c
data/README.md CHANGED
@@ -28,7 +28,7 @@ template but you can use your own, too.
28
28
  The [feeds.inqk.net repository][example] includes an example of how the
29
29
  Feedstock library can be used in practice.
30
30
 
31
- [example]: https://github.com/pyrmont/feeds.inqk.net/tree/4a95a438f8d3a707db7946238181ab76c029ee77/src/input
31
+ [example]: https://github.com/pyrmont/feeds.inqk.net/
32
32
  "An example of using the Feedstock library"
33
33
 
34
34
  ## Installation
@@ -51,17 +51,17 @@ url = "https://example.org"
51
51
 
52
52
  # Define the rules
53
53
  rules = { info: { id: url,
54
- title: "div.title",
55
- updated: "span.date" },
56
- entries: "div.story",
57
- entry: { id: { path: "a",
58
- content: { attribute: "href" } },
59
- title: "h2",
60
- updated: "span.date",
61
- author: "span.byline",
62
- link: { path: "a",
63
- content: { attribute: "href" } },
64
- summary: "div.summary" } }
54
+ title: Feedstock::Extract.new(selector: "div.title"),
55
+ updated: Feedstock::Extract.new(selector: "span.date") },
56
+
57
+ entry: { id: Feedstock::Extract.new(selector: "a", content: { attribute: "href" }),
58
+ title: Feedstock::Extract.new(selector: "h2"),
59
+ updated: Feedstock::Extract.new(selector: "span.date"),
60
+ author: Feedstock::Extract.new(selector: "span.byline"),
61
+ link: Feedstock::Extract.new(selector: "a", content: { attribute: "href" }),
62
+ summary: Feedstock::Extract.new(selector: "div.summary") },
63
+
64
+ entries: Feedstock::Extract.new(selector: "div.story") }
65
65
 
66
66
  # Using the default format and template
67
67
  Feedstock.feed url, rules
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Feedstock
4
- VERSION = "0.3.0"
4
+ VERSION = "0.4.0"
5
5
  end
data/lib/feedstock.rb CHANGED
@@ -6,14 +6,22 @@ require "open-uri"
6
6
  require "timeliness"
7
7
 
8
8
  module Feedstock
9
+ class Extract < Struct.new("Extract", :selector, :absolute, :content, :processor, :prefix,
10
+ :suffix, :type, :filter, keyword_init: true); end
11
+
9
12
  class << self
10
- def feed(url, rules, format = :html, template_file = "#{__dir__}/../default.xml")
13
+ def data(url, rules, format = :html)
11
14
  page = download_page url, format
12
- rules = normalise_rules rules
13
15
 
14
16
  info = extract_info page, rules
15
17
  entries = extract_entries page, rules
16
18
 
19
+ { info: info, entries: entries }
20
+ end
21
+
22
+ def feed(url, rules, format = :html, template_file = "#{__dir__}/../default.xml")
23
+ info, entries = data(url, rules, format).values_at(:info, :entries)
24
+
17
25
  create_feed info, entries, template_file
18
26
  end
19
27
 
@@ -34,7 +42,7 @@ module Feedstock
34
42
  end
35
43
 
36
44
  private def extract_content(node, rule)
37
- case rule[:content]
45
+ case rule.content
38
46
  in { attribute: attribute }
39
47
  node[attribute]
40
48
  in "inner_html"
@@ -59,12 +67,12 @@ module Feedstock
59
67
  entries = Array.new
60
68
 
61
69
  rules[:entry].each do |name, rule|
62
- if rule[:literal]
63
- static[name.to_s] = rule[:literal]
64
- elsif rule[:repeat]
65
- static[name.to_s] = format_content page.at_css(rule[:path]), rule
70
+ if rule.is_a? String
71
+ static[name.to_s] = rule
72
+ elsif rule.absolute
73
+ static[name.to_s] = format_content page.at_css(rule.selector), rule
66
74
  else
67
- page.css(rule[:path]).each.with_index do |match, i|
75
+ page.css(rule.selector).each.with_index do |match, i|
68
76
  entries[i] = Hash.new if entries[i].nil?
69
77
  entries[i].merge!({ name.to_s => format_content(match, rule) })
70
78
  end
@@ -81,16 +89,18 @@ module Feedstock
81
89
  private def extract_entries_wrapped(page, rules)
82
90
  entries = Array.new
83
91
 
84
- page.css(rules[:entries][:path]).each.with_index do |node, i|
92
+ page.css(rules[:entries].selector).each.with_index do |parent, i|
85
93
  rules[:entry].each do |name, rule|
86
94
  entries[i] = Hash.new if entries[i].nil?
87
95
 
88
- content = if rule[:literal]
89
- rule[:literal]
90
- elsif rule[:repeat]
91
- format_content page.at_css(rule[:path]), rule
96
+ content = if rule.is_a? String
97
+ rule
98
+ elsif rule.absolute
99
+ format_content page.at_css(rule.selector), rule
100
+ elsif rule.selector.empty?
101
+ format_content parent, rule
92
102
  else
93
- format_content node.at_css(rule[:path]), rule
103
+ format_content parent.at_css(rule.selector), rule
94
104
  end
95
105
 
96
106
  entries[i].merge!({ name.to_s => content })
@@ -98,19 +108,19 @@ module Feedstock
98
108
  end
99
109
 
100
110
 
101
- return entries unless rules[:entries][:filter].is_a? Proc
111
+ return entries unless rules[:entries].filter.is_a? Proc
102
112
 
103
- entries.filter(&rules[:entries][:filter])
113
+ entries.filter(&rules[:entries].filter)
104
114
  end
105
115
 
106
116
  private def extract_info(page, rules)
107
117
  info = Hash.new
108
118
 
109
119
  rules[:info].each do |name, rule|
110
- if rule[:literal]
111
- info[name.to_s] = rule[:literal]
120
+ if rule.is_a? String
121
+ info[name.to_s] = rule
112
122
  else
113
- info[name.to_s] = format_content page.at_css(rule[:path]), rule
123
+ info[name.to_s] = format_content page.at_css(rule.selector), rule
114
124
  end
115
125
  end
116
126
 
@@ -124,7 +134,7 @@ module Feedstock
124
134
  processed = process_content text, rule
125
135
  wrapped = wrap_content processed, rule
126
136
 
127
- case rule[:type]
137
+ case rule.type
128
138
  when "cdata"
129
139
  "<![CDATA[#{wrapped}]]>"
130
140
  when "datetime"
@@ -134,34 +144,18 @@ module Feedstock
134
144
  end
135
145
  end
136
146
 
137
- private def normalise_rules(rules)
138
- rules.keys.each do |category|
139
- case category
140
- when :info, :entry
141
- rules[category].each do |name, rule|
142
- rules[category][name] = { :path => rule } unless rule.is_a? Hash
143
- end
144
- when :entries
145
- rule = rules[category]
146
- rules[category] = { :path => rule } unless rule.is_a? Hash
147
- end
148
- end
149
-
150
- rules
151
- end
152
-
153
147
  private def process_content(content, rule)
154
- if rule[:processor]
155
- rule[:processor].call content, rule
148
+ if rule.processor
149
+ rule.processor.call content, rule
156
150
  else
157
151
  content
158
152
  end
159
153
  end
160
154
 
161
155
  private def wrap_content(content, rule)
162
- return content unless rule[:prepend] || rule[:append]
156
+ return content unless (rule.prefix || rule.suffix)
163
157
 
164
- "#{rule[:prepend]}#{content}#{rule[:append]}"
158
+ "#{rule.prefix}#{content}#{rule.suffix}"
165
159
  end
166
160
  end
167
161
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedstock
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Camilleri
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2021-02-06 00:00:00.000000000 Z
10
+ date: 2025-02-21 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: nokogiri
@@ -101,7 +100,6 @@ licenses:
101
100
  metadata:
102
101
  documentation_uri: https://github.com/pyrmont/feedstock/blob/v0.3.0/api.md
103
102
  allowed_push_host: https://rubygems.org
104
- post_install_message:
105
103
  rdoc_options: []
106
104
  require_paths:
107
105
  - lib
@@ -116,8 +114,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
116
114
  - !ruby/object:Gem::Version
117
115
  version: '0'
118
116
  requirements: []
119
- rubygems_version: 3.2.3
120
- signing_key:
117
+ rubygems_version: 3.6.2
121
118
  specification_version: 4
122
119
  summary: A library for creating RSS feeds from webpages
123
120
  test_files: []