feedstock 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -12
- data/lib/feedstock/version.rb +1 -1
- data/lib/feedstock.rb +34 -40
- metadata +3 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1a02c229edb1b2d7c98904d6263aab47cfe5ef4d605c5a3c78ec412c1bb2083
|
4
|
+
data.tar.gz: f0c35d3a675eeb01cbbc73952b85f484df631459a1542161d2b198b8c3b1ccf8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b31805cfc5c8aedaabf286f2a76b269df4883daf24850121a94edb349a219c1d7177e161df20a434d07b17eed3bd129c7529d94093559fc5520fe19bc0dc2b45
|
7
|
+
data.tar.gz: b47eac95bda32a4a5a7a7a4a904d4baeb6b1055a18e39f09d1d2ed38c1c7b0a6ab9d0672db4f614b210d5bb1969afbd3de962292d6ccf750895e00f6a4b13d6c
|
data/README.md
CHANGED
@@ -28,7 +28,7 @@ template but you can use your own, too.
|
|
28
28
|
The [feeds.inqk.net repository][example] includes an example of how the
|
29
29
|
Feedstock library can be used in practice.
|
30
30
|
|
31
|
-
[example]: https://github.com/pyrmont/feeds.inqk.net/
|
31
|
+
[example]: https://github.com/pyrmont/feeds.inqk.net/
|
32
32
|
"An example of using the Feedstock library"
|
33
33
|
|
34
34
|
## Installation
|
@@ -51,17 +51,17 @@ url = "https://example.org"
|
|
51
51
|
|
52
52
|
# Define the rules
|
53
53
|
rules = { info: { id: url,
|
54
|
-
title: "div.title",
|
55
|
-
updated: "span.date" },
|
56
|
-
|
57
|
-
entry: { id: {
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
54
|
+
title: Feedstock::Extract.new(selector: "div.title"),
|
55
|
+
updated: Feedstock::Extract.new(selector: "span.date") },
|
56
|
+
|
57
|
+
entry: { id: Feedstock::Extract.new(selector: "a", content: { attribute: "href" }),
|
58
|
+
title: Feedstock::Extract.new(selector: "h2"),
|
59
|
+
updated: Feedstock::Extract.new(selector: "span.date"),
|
60
|
+
author: Feedstock::Extract.new(selector: "span.byline"),
|
61
|
+
link: Feedstock::Extract.new(selector: "a", content: { attribute: "href" }),
|
62
|
+
summary: Feedstock::Extract.new(selector: "div.summary") },
|
63
|
+
|
64
|
+
entries: Feedstock::Extract.new(selector: "div.story") }
|
65
65
|
|
66
66
|
# Using the default format and template
|
67
67
|
Feedstock.feed url, rules
|
data/lib/feedstock/version.rb
CHANGED
data/lib/feedstock.rb
CHANGED
@@ -6,14 +6,22 @@ require "open-uri"
|
|
6
6
|
require "timeliness"
|
7
7
|
|
8
8
|
module Feedstock
|
9
|
+
class Extract < Struct.new("Extract", :selector, :absolute, :content, :processor, :prefix,
|
10
|
+
:suffix, :type, :filter, keyword_init: true); end
|
11
|
+
|
9
12
|
class << self
|
10
|
-
def
|
13
|
+
def data(url, rules, format = :html)
|
11
14
|
page = download_page url, format
|
12
|
-
rules = normalise_rules rules
|
13
15
|
|
14
16
|
info = extract_info page, rules
|
15
17
|
entries = extract_entries page, rules
|
16
18
|
|
19
|
+
{ info: info, entries: entries }
|
20
|
+
end
|
21
|
+
|
22
|
+
def feed(url, rules, format = :html, template_file = "#{__dir__}/../default.xml")
|
23
|
+
info, entries = data(url, rules, format).values_at(:info, :entries)
|
24
|
+
|
17
25
|
create_feed info, entries, template_file
|
18
26
|
end
|
19
27
|
|
@@ -34,7 +42,7 @@ module Feedstock
|
|
34
42
|
end
|
35
43
|
|
36
44
|
private def extract_content(node, rule)
|
37
|
-
case rule
|
45
|
+
case rule.content
|
38
46
|
in { attribute: attribute }
|
39
47
|
node[attribute]
|
40
48
|
in "inner_html"
|
@@ -59,12 +67,12 @@ module Feedstock
|
|
59
67
|
entries = Array.new
|
60
68
|
|
61
69
|
rules[:entry].each do |name, rule|
|
62
|
-
if rule
|
63
|
-
static[name.to_s] = rule
|
64
|
-
elsif rule
|
65
|
-
static[name.to_s] = format_content page.at_css(rule
|
70
|
+
if rule.is_a? String
|
71
|
+
static[name.to_s] = rule
|
72
|
+
elsif rule.absolute
|
73
|
+
static[name.to_s] = format_content page.at_css(rule.selector), rule
|
66
74
|
else
|
67
|
-
page.css(rule
|
75
|
+
page.css(rule.selector).each.with_index do |match, i|
|
68
76
|
entries[i] = Hash.new if entries[i].nil?
|
69
77
|
entries[i].merge!({ name.to_s => format_content(match, rule) })
|
70
78
|
end
|
@@ -81,16 +89,18 @@ module Feedstock
|
|
81
89
|
private def extract_entries_wrapped(page, rules)
|
82
90
|
entries = Array.new
|
83
91
|
|
84
|
-
page.css(rules[:entries]
|
92
|
+
page.css(rules[:entries].selector).each.with_index do |parent, i|
|
85
93
|
rules[:entry].each do |name, rule|
|
86
94
|
entries[i] = Hash.new if entries[i].nil?
|
87
95
|
|
88
|
-
content = if rule
|
89
|
-
rule
|
90
|
-
elsif rule
|
91
|
-
format_content page.at_css(rule
|
96
|
+
content = if rule.is_a? String
|
97
|
+
rule
|
98
|
+
elsif rule.absolute
|
99
|
+
format_content page.at_css(rule.selector), rule
|
100
|
+
elsif rule.selector.empty?
|
101
|
+
format_content parent, rule
|
92
102
|
else
|
93
|
-
format_content
|
103
|
+
format_content parent.at_css(rule.selector), rule
|
94
104
|
end
|
95
105
|
|
96
106
|
entries[i].merge!({ name.to_s => content })
|
@@ -98,19 +108,19 @@ module Feedstock
|
|
98
108
|
end
|
99
109
|
|
100
110
|
|
101
|
-
return entries unless rules[:entries]
|
111
|
+
return entries unless rules[:entries].filter.is_a? Proc
|
102
112
|
|
103
|
-
entries.filter(&rules[:entries]
|
113
|
+
entries.filter(&rules[:entries].filter)
|
104
114
|
end
|
105
115
|
|
106
116
|
private def extract_info(page, rules)
|
107
117
|
info = Hash.new
|
108
118
|
|
109
119
|
rules[:info].each do |name, rule|
|
110
|
-
if rule
|
111
|
-
info[name.to_s] = rule
|
120
|
+
if rule.is_a? String
|
121
|
+
info[name.to_s] = rule
|
112
122
|
else
|
113
|
-
info[name.to_s] = format_content page.at_css(rule
|
123
|
+
info[name.to_s] = format_content page.at_css(rule.selector), rule
|
114
124
|
end
|
115
125
|
end
|
116
126
|
|
@@ -124,7 +134,7 @@ module Feedstock
|
|
124
134
|
processed = process_content text, rule
|
125
135
|
wrapped = wrap_content processed, rule
|
126
136
|
|
127
|
-
case rule
|
137
|
+
case rule.type
|
128
138
|
when "cdata"
|
129
139
|
"<![CDATA[#{wrapped}]]>"
|
130
140
|
when "datetime"
|
@@ -134,34 +144,18 @@ module Feedstock
|
|
134
144
|
end
|
135
145
|
end
|
136
146
|
|
137
|
-
private def normalise_rules(rules)
|
138
|
-
rules.keys.each do |category|
|
139
|
-
case category
|
140
|
-
when :info, :entry
|
141
|
-
rules[category].each do |name, rule|
|
142
|
-
rules[category][name] = { :path => rule } unless rule.is_a? Hash
|
143
|
-
end
|
144
|
-
when :entries
|
145
|
-
rule = rules[category]
|
146
|
-
rules[category] = { :path => rule } unless rule.is_a? Hash
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
rules
|
151
|
-
end
|
152
|
-
|
153
147
|
private def process_content(content, rule)
|
154
|
-
if rule
|
155
|
-
rule
|
148
|
+
if rule.processor
|
149
|
+
rule.processor.call content, rule
|
156
150
|
else
|
157
151
|
content
|
158
152
|
end
|
159
153
|
end
|
160
154
|
|
161
155
|
private def wrap_content(content, rule)
|
162
|
-
return content unless rule
|
156
|
+
return content unless (rule.prefix || rule.suffix)
|
163
157
|
|
164
|
-
"#{rule
|
158
|
+
"#{rule.prefix}#{content}#{rule.suffix}"
|
165
159
|
end
|
166
160
|
end
|
167
161
|
end
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedstock
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Camilleri
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-02-21 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: nokogiri
|
@@ -101,7 +100,6 @@ licenses:
|
|
101
100
|
metadata:
|
102
101
|
documentation_uri: https://github.com/pyrmont/feedstock/blob/v0.3.0/api.md
|
103
102
|
allowed_push_host: https://rubygems.org
|
104
|
-
post_install_message:
|
105
103
|
rdoc_options: []
|
106
104
|
require_paths:
|
107
105
|
- lib
|
@@ -116,8 +114,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
114
|
- !ruby/object:Gem::Version
|
117
115
|
version: '0'
|
118
116
|
requirements: []
|
119
|
-
rubygems_version: 3.2
|
120
|
-
signing_key:
|
117
|
+
rubygems_version: 3.6.2
|
121
118
|
specification_version: 4
|
122
119
|
summary: A library for creating RSS feeds from webpages
|
123
120
|
test_files: []
|