horsefield 0.6.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7233d84653e5fb4eb1183f6f12f4c2a8fb8d4fbec40a103a0d60cdfea85e215c
4
- data.tar.gz: 308033f17e37939d4f60da0a3ef431afd123cd2399c82673bab7376ed6808b8b
3
+ metadata.gz: 361f824eedceebb4366e8d2a8041b15e9344424c801993059e435af201449442
4
+ data.tar.gz: 6bce024ad4503a6dbca19e3f5ca58ba6ae3cb4216c151ed3177e0eeb0cc3c028
5
5
  SHA512:
6
- metadata.gz: e0b42e0f74571b90b5e369efdc48eb3e225984d54e4f8a9eed4edf81b44c6d240d36a3fb5f6cd23d0d1d53bf047fe6f8555bd2bc8a24fe46d5dd1c743eb82303
7
- data.tar.gz: 3f5374c3f6f843ab29e21e95eb6c723eefd10a8c4be7cf3daa9698ca172961056f6eb5102e4cdd14e7912fe771b98690014e6e1ed39edcf19424e873c68491ab
6
+ metadata.gz: e966a5d390b4928ab479077fa31efd4a4df4471c8b1cb75fc4ea2e22054f6ae0792c01bc6033824022cea5d12d6e12094327e5cc4bb9046808364a896f17e19e
7
+ data.tar.gz: eee3aedbfa542d46573952aac4ea4a986901c2640bcc2fc7961d4de20fc1764f59d925df94139654479c2abc952758d24d11ddce36c8736d899486fa3b3fafcb
@@ -24,7 +24,20 @@ module Horsefield
24
24
  raise MissingSelectorError, "Couldn't find required selector (#{selector})" if lookup == :required && !doc
25
25
  return fields if lookup == :presence && !doc
26
26
 
27
- fields.merge!(Hash[[[name, doc && doc.with_fresh_fields.instance_eval(&processor(&block))]]])
27
+ if block
28
+ # Process the sub-document
29
+ sub_doc = doc && doc.with_fresh_fields
30
+
31
+ # Run the block to populate fields and get its return value
32
+ return_value = sub_doc && sub_doc.instance_eval(&block)
33
+
34
+ # Use fields if they were populated, otherwise use the block's return value
35
+ value = (sub_doc && !sub_doc.fields.empty?) ? sub_doc.fields : return_value
36
+
37
+ fields.merge!(Hash[[[name, value]]])
38
+ else
39
+ fields.merge!(Hash[[[name, doc && doc.text.strip]]])
40
+ end
28
41
  end
29
42
 
30
43
  def many!(name, selector, &block)
@@ -1,3 +1,3 @@
1
1
  module Horsefield
2
- VERSION = "0.6.1"
2
+ VERSION = "0.7.1"
3
3
  end
@@ -1,9 +1,19 @@
1
- require 'test_helper'
1
+ require_relative '../test_helper'
2
2
  require 'pry'
3
3
 
4
4
  class RedditScraper
5
5
  include Horsefield::Scraper
6
6
 
7
+ one :meta do
8
+ [:keywords].each do |name|
9
+ one name, ".//meta[@name='#{name}']/@content"
10
+ end
11
+ end
12
+
13
+ one :static do
14
+ "test"
15
+ end
16
+
7
17
  many :posts, '#siteTable .thing' do
8
18
  one :title, 'a.title'
9
19
  one :tagline, 'p.tagline' do
@@ -24,6 +34,8 @@ class TestScraper < Minitest::Test
24
34
 
25
35
  def test_scraper
26
36
  reddit = RedditScraper.new(@reddit_html).scrape
27
- p reddit[:posts].first[:tagline]
37
+ assert_equal "reddit, reddit.com, vote, comment, submit", reddit[:meta][:keywords]
38
+ assert_equal "Chris Pratt, homeless, living in this van, holding the script to his first acting job", reddit[:posts][0][:title]
39
+ assert_equal "test", reddit[:static]
28
40
  end
29
41
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: horsefield
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erik Strömberg
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-05-29 00:00:00.000000000 Z
11
+ date: 2025-05-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -94,7 +94,7 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description:
97
+ description:
98
98
  email:
99
99
  - erik.stromberg@gmail.com
100
100
  executables: []
@@ -124,7 +124,7 @@ homepage: http://github.com/apa512/horsefield
124
124
  licenses:
125
125
  - MIT
126
126
  metadata: {}
127
- post_install_message:
127
+ post_install_message:
128
128
  rdoc_options: []
129
129
  require_paths:
130
130
  - lib
@@ -139,8 +139,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
139
139
  - !ruby/object:Gem::Version
140
140
  version: '0'
141
141
  requirements: []
142
- rubygems_version: 3.1.2
143
- signing_key:
142
+ rubygems_version: 3.5.22
143
+ signing_key:
144
144
  specification_version: 4
145
145
  summary: It's a scraper
146
146
  test_files: