feedstock 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -12
- data/lib/feedstock/version.rb +1 -1
- data/lib/feedstock.rb +34 -40
- metadata +3 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1a02c229edb1b2d7c98904d6263aab47cfe5ef4d605c5a3c78ec412c1bb2083
|
4
|
+
data.tar.gz: f0c35d3a675eeb01cbbc73952b85f484df631459a1542161d2b198b8c3b1ccf8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b31805cfc5c8aedaabf286f2a76b269df4883daf24850121a94edb349a219c1d7177e161df20a434d07b17eed3bd129c7529d94093559fc5520fe19bc0dc2b45
|
7
|
+
data.tar.gz: b47eac95bda32a4a5a7a7a4a904d4baeb6b1055a18e39f09d1d2ed38c1c7b0a6ab9d0672db4f614b210d5bb1969afbd3de962292d6ccf750895e00f6a4b13d6c
|
data/README.md
CHANGED
@@ -28,7 +28,7 @@ template but you can use your own, too.
|
|
28
28
|
The [feeds.inqk.net repository][example] includes an example of how the
|
29
29
|
Feedstock library can be used in practice.
|
30
30
|
|
31
|
-
[example]: https://github.com/pyrmont/feeds.inqk.net/
|
31
|
+
[example]: https://github.com/pyrmont/feeds.inqk.net/
|
32
32
|
"An example of using the Feedstock library"
|
33
33
|
|
34
34
|
## Installation
|
@@ -51,17 +51,17 @@ url = "https://example.org"
|
|
51
51
|
|
52
52
|
# Define the rules
|
53
53
|
rules = { info: { id: url,
|
54
|
-
title: "div.title",
|
55
|
-
updated: "span.date" },
|
56
|
-
|
57
|
-
entry: { id: {
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
54
|
+
title: Feedstock::Extract.new(selector: "div.title"),
|
55
|
+
updated: Feedstock::Extract.new(selector: "span.date") },
|
56
|
+
|
57
|
+
entry: { id: Feedstock::Extract.new(selector: "a", content: { attribute: "href" }),
|
58
|
+
title: Feedstock::Extract.new(selector: "h2"),
|
59
|
+
updated: Feedstock::Extract.new(selector: "span.date"),
|
60
|
+
author: Feedstock::Extract.new(selector: "span.byline"),
|
61
|
+
link: Feedstock::Extract.new(selector: "a", content: { attribute: "href" }),
|
62
|
+
summary: Feedstock::Extract.new(selector: "div.summary") },
|
63
|
+
|
64
|
+
entries: Feedstock::Extract.new(selector: "div.story") }
|
65
65
|
|
66
66
|
# Using the default format and template
|
67
67
|
Feedstock.feed url, rules
|
data/lib/feedstock/version.rb
CHANGED
data/lib/feedstock.rb
CHANGED
@@ -6,14 +6,22 @@ require "open-uri"
|
|
6
6
|
require "timeliness"
|
7
7
|
|
8
8
|
module Feedstock
|
9
|
+
class Extract < Struct.new("Extract", :selector, :absolute, :content, :processor, :prefix,
|
10
|
+
:suffix, :type, :filter, keyword_init: true); end
|
11
|
+
|
9
12
|
class << self
|
10
|
-
def
|
13
|
+
def data(url, rules, format = :html)
|
11
14
|
page = download_page url, format
|
12
|
-
rules = normalise_rules rules
|
13
15
|
|
14
16
|
info = extract_info page, rules
|
15
17
|
entries = extract_entries page, rules
|
16
18
|
|
19
|
+
{ info: info, entries: entries }
|
20
|
+
end
|
21
|
+
|
22
|
+
def feed(url, rules, format = :html, template_file = "#{__dir__}/../default.xml")
|
23
|
+
info, entries = data(url, rules, format).values_at(:info, :entries)
|
24
|
+
|
17
25
|
create_feed info, entries, template_file
|
18
26
|
end
|
19
27
|
|
@@ -34,7 +42,7 @@ module Feedstock
|
|
34
42
|
end
|
35
43
|
|
36
44
|
private def extract_content(node, rule)
|
37
|
-
case rule
|
45
|
+
case rule.content
|
38
46
|
in { attribute: attribute }
|
39
47
|
node[attribute]
|
40
48
|
in "inner_html"
|
@@ -59,12 +67,12 @@ module Feedstock
|
|
59
67
|
entries = Array.new
|
60
68
|
|
61
69
|
rules[:entry].each do |name, rule|
|
62
|
-
if rule
|
63
|
-
static[name.to_s] = rule
|
64
|
-
elsif rule
|
65
|
-
static[name.to_s] = format_content page.at_css(rule
|
70
|
+
if rule.is_a? String
|
71
|
+
static[name.to_s] = rule
|
72
|
+
elsif rule.absolute
|
73
|
+
static[name.to_s] = format_content page.at_css(rule.selector), rule
|
66
74
|
else
|
67
|
-
page.css(rule
|
75
|
+
page.css(rule.selector).each.with_index do |match, i|
|
68
76
|
entries[i] = Hash.new if entries[i].nil?
|
69
77
|
entries[i].merge!({ name.to_s => format_content(match, rule) })
|
70
78
|
end
|
@@ -81,16 +89,18 @@ module Feedstock
|
|
81
89
|
private def extract_entries_wrapped(page, rules)
|
82
90
|
entries = Array.new
|
83
91
|
|
84
|
-
page.css(rules[:entries]
|
92
|
+
page.css(rules[:entries].selector).each.with_index do |parent, i|
|
85
93
|
rules[:entry].each do |name, rule|
|
86
94
|
entries[i] = Hash.new if entries[i].nil?
|
87
95
|
|
88
|
-
content = if rule
|
89
|
-
rule
|
90
|
-
elsif rule
|
91
|
-
format_content page.at_css(rule
|
96
|
+
content = if rule.is_a? String
|
97
|
+
rule
|
98
|
+
elsif rule.absolute
|
99
|
+
format_content page.at_css(rule.selector), rule
|
100
|
+
elsif rule.selector.empty?
|
101
|
+
format_content parent, rule
|
92
102
|
else
|
93
|
-
format_content
|
103
|
+
format_content parent.at_css(rule.selector), rule
|
94
104
|
end
|
95
105
|
|
96
106
|
entries[i].merge!({ name.to_s => content })
|
@@ -98,19 +108,19 @@ module Feedstock
|
|
98
108
|
end
|
99
109
|
|
100
110
|
|
101
|
-
return entries unless rules[:entries]
|
111
|
+
return entries unless rules[:entries].filter.is_a? Proc
|
102
112
|
|
103
|
-
entries.filter(&rules[:entries]
|
113
|
+
entries.filter(&rules[:entries].filter)
|
104
114
|
end
|
105
115
|
|
106
116
|
private def extract_info(page, rules)
|
107
117
|
info = Hash.new
|
108
118
|
|
109
119
|
rules[:info].each do |name, rule|
|
110
|
-
if rule
|
111
|
-
info[name.to_s] = rule
|
120
|
+
if rule.is_a? String
|
121
|
+
info[name.to_s] = rule
|
112
122
|
else
|
113
|
-
info[name.to_s] = format_content page.at_css(rule
|
123
|
+
info[name.to_s] = format_content page.at_css(rule.selector), rule
|
114
124
|
end
|
115
125
|
end
|
116
126
|
|
@@ -124,7 +134,7 @@ module Feedstock
|
|
124
134
|
processed = process_content text, rule
|
125
135
|
wrapped = wrap_content processed, rule
|
126
136
|
|
127
|
-
case rule
|
137
|
+
case rule.type
|
128
138
|
when "cdata"
|
129
139
|
"<![CDATA[#{wrapped}]]>"
|
130
140
|
when "datetime"
|
@@ -134,34 +144,18 @@ module Feedstock
|
|
134
144
|
end
|
135
145
|
end
|
136
146
|
|
137
|
-
private def normalise_rules(rules)
|
138
|
-
rules.keys.each do |category|
|
139
|
-
case category
|
140
|
-
when :info, :entry
|
141
|
-
rules[category].each do |name, rule|
|
142
|
-
rules[category][name] = { :path => rule } unless rule.is_a? Hash
|
143
|
-
end
|
144
|
-
when :entries
|
145
|
-
rule = rules[category]
|
146
|
-
rules[category] = { :path => rule } unless rule.is_a? Hash
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
rules
|
151
|
-
end
|
152
|
-
|
153
147
|
private def process_content(content, rule)
|
154
|
-
if rule
|
155
|
-
rule
|
148
|
+
if rule.processor
|
149
|
+
rule.processor.call content, rule
|
156
150
|
else
|
157
151
|
content
|
158
152
|
end
|
159
153
|
end
|
160
154
|
|
161
155
|
private def wrap_content(content, rule)
|
162
|
-
return content unless rule
|
156
|
+
return content unless (rule.prefix || rule.suffix)
|
163
157
|
|
164
|
-
"#{rule
|
158
|
+
"#{rule.prefix}#{content}#{rule.suffix}"
|
165
159
|
end
|
166
160
|
end
|
167
161
|
end
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedstock
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Camilleri
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-02-21 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: nokogiri
|
@@ -101,7 +100,6 @@ licenses:
|
|
101
100
|
metadata:
|
102
101
|
documentation_uri: https://github.com/pyrmont/feedstock/blob/v0.3.0/api.md
|
103
102
|
allowed_push_host: https://rubygems.org
|
104
|
-
post_install_message:
|
105
103
|
rdoc_options: []
|
106
104
|
require_paths:
|
107
105
|
- lib
|
@@ -116,8 +114,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
116
114
|
- !ruby/object:Gem::Version
|
117
115
|
version: '0'
|
118
116
|
requirements: []
|
119
|
-
rubygems_version: 3.2
|
120
|
-
signing_key:
|
117
|
+
rubygems_version: 3.6.2
|
121
118
|
specification_version: 4
|
122
119
|
summary: A library for creating RSS feeds from webpages
|
123
120
|
test_files: []
|