horsefield 0.6.1 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/horsefield/diggable.rb +14 -1
- data/lib/horsefield/version.rb +1 -1
- data/test/horsefield/test_scraper.rb +14 -2
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 361f824eedceebb4366e8d2a8041b15e9344424c801993059e435af201449442
|
4
|
+
data.tar.gz: 6bce024ad4503a6dbca19e3f5ca58ba6ae3cb4216c151ed3177e0eeb0cc3c028
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e966a5d390b4928ab479077fa31efd4a4df4471c8b1cb75fc4ea2e22054f6ae0792c01bc6033824022cea5d12d6e12094327e5cc4bb9046808364a896f17e19e
|
7
|
+
data.tar.gz: eee3aedbfa542d46573952aac4ea4a986901c2640bcc2fc7961d4de20fc1764f59d925df94139654479c2abc952758d24d11ddce36c8736d899486fa3b3fafcb
|
data/lib/horsefield/diggable.rb
CHANGED
@@ -24,7 +24,20 @@ module Horsefield
|
|
24
24
|
raise MissingSelectorError, "Couldn't find required selector (#{selector})" if lookup == :required && !doc
|
25
25
|
return fields if lookup == :presence && !doc
|
26
26
|
|
27
|
-
|
27
|
+
if block
|
28
|
+
# Process the sub-document
|
29
|
+
sub_doc = doc && doc.with_fresh_fields
|
30
|
+
|
31
|
+
# Run the block to populate fields and get its return value
|
32
|
+
return_value = sub_doc && sub_doc.instance_eval(&block)
|
33
|
+
|
34
|
+
# Use fields if they were populated, otherwise use the block's return value
|
35
|
+
value = (sub_doc && !sub_doc.fields.empty?) ? sub_doc.fields : return_value
|
36
|
+
|
37
|
+
fields.merge!(Hash[[[name, value]]])
|
38
|
+
else
|
39
|
+
fields.merge!(Hash[[[name, doc && doc.text.strip]]])
|
40
|
+
end
|
28
41
|
end
|
29
42
|
|
30
43
|
def many!(name, selector, &block)
|
data/lib/horsefield/version.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
|
-
|
1
|
+
require_relative '../test_helper'
|
2
2
|
require 'pry'
|
3
3
|
|
4
4
|
class RedditScraper
|
5
5
|
include Horsefield::Scraper
|
6
6
|
|
7
|
+
one :meta do
|
8
|
+
[:keywords].each do |name|
|
9
|
+
one name, ".//meta[@name='#{name}']/@content"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
one :static do
|
14
|
+
"test"
|
15
|
+
end
|
16
|
+
|
7
17
|
many :posts, '#siteTable .thing' do
|
8
18
|
one :title, 'a.title'
|
9
19
|
one :tagline, 'p.tagline' do
|
@@ -24,6 +34,8 @@ class TestScraper < Minitest::Test
|
|
24
34
|
|
25
35
|
def test_scraper
|
26
36
|
reddit = RedditScraper.new(@reddit_html).scrape
|
27
|
-
|
37
|
+
assert_equal "reddit, reddit.com, vote, comment, submit", reddit[:meta][:keywords]
|
38
|
+
assert_equal "Chris Pratt, homeless, living in this van, holding the script to his first acting job", reddit[:posts][0][:title]
|
39
|
+
assert_equal "test", reddit[:static]
|
28
40
|
end
|
29
41
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: horsefield
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erik Strömberg
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -94,7 +94,7 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
-
description:
|
97
|
+
description:
|
98
98
|
email:
|
99
99
|
- erik.stromberg@gmail.com
|
100
100
|
executables: []
|
@@ -124,7 +124,7 @@ homepage: http://github.com/apa512/horsefield
|
|
124
124
|
licenses:
|
125
125
|
- MIT
|
126
126
|
metadata: {}
|
127
|
-
post_install_message:
|
127
|
+
post_install_message:
|
128
128
|
rdoc_options: []
|
129
129
|
require_paths:
|
130
130
|
- lib
|
@@ -139,8 +139,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
139
|
- !ruby/object:Gem::Version
|
140
140
|
version: '0'
|
141
141
|
requirements: []
|
142
|
-
rubygems_version: 3.
|
143
|
-
signing_key:
|
142
|
+
rubygems_version: 3.5.22
|
143
|
+
signing_key:
|
144
144
|
specification_version: 4
|
145
145
|
summary: It's a scraper
|
146
146
|
test_files:
|