scrapula 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +3 -0
  3. data/.rspec +1 -0
  4. data/.simplecov +1 -0
  5. data/CHANGELOG.md +15 -0
  6. data/CONTRIBUTING.md +0 -0
  7. data/Gemfile +24 -0
  8. data/Gemfile.lock +127 -0
  9. data/Guardfile +12 -0
  10. data/LICENSE +21 -0
  11. data/README.md +108 -0
  12. data/ROADMAP.md +42 -0
  13. data/Rakefile +30 -0
  14. data/examples/block_syntax.rb +20 -0
  15. data/examples/find_nodes.rb +6 -0
  16. data/examples/get_first_and_scrape_later.rb +13 -0
  17. data/examples/metas.rb +32 -0
  18. data/examples/more_api.rb +17 -0
  19. data/examples/nested_results.rb +14 -0
  20. data/examples/one_liners.rb +9 -0
  21. data/examples/posting_data.rb +7 -0
  22. data/examples/s.rb +24 -0
  23. data/examples/validation.rb +40 -0
  24. data/lib/scrapula.rb +47 -0
  25. data/lib/scrapula/_old_scraper.rb +110 -0
  26. data/lib/scrapula/agent.rb +8 -0
  27. data/lib/scrapula/data.rb +18 -0
  28. data/lib/scrapula/page.rb +109 -0
  29. data/lib/scrapula/page/meta.rb +74 -0
  30. data/lib/scrapula/request.rb +44 -0
  31. data/lib/scrapula/s.rb +21 -0
  32. data/lib/scrapula/scraper.rb +56 -0
  33. data/lib/scrapula/version.rb +3 -0
  34. data/scrapula.gemspec +36 -0
  35. data/spec/cassettes/Scrapula_Page_Meta/_.yml +748 -0
  36. data/spec/cassettes/Scrapula_Page_Meta/_/Open_Graph.yml +322 -0
  37. data/spec/cassettes/Scrapula_Page_Meta/_/other_names.yml +586 -0
  38. data/spec/cassettes/Scrapula_Page_Meta/_/standard_names.yml +429 -0
  39. data/spec/lib/scrapula/agent_spec.rb +6 -0
  40. data/spec/lib/scrapula/data_spec.rb +19 -0
  41. data/spec/lib/scrapula/page/meta_spec.rb +89 -0
  42. data/spec/lib/scrapula/page_spec.rb +136 -0
  43. data/spec/lib/scrapula/request_spec.rb +91 -0
  44. data/spec/lib/scrapula/s_spec.rb +44 -0
  45. data/spec/lib/scrapula/scraper_spec.rb +205 -0
  46. data/spec/lib/scrapula_spec.rb +141 -0
  47. data/spec/spec_helper.rb +26 -0
  48. metadata +118 -0
@@ -0,0 +1,141 @@
1
+ require 'scrapula'
2
+
3
+ describe Scrapula do
4
+
5
+ let(:url) { 'http://example.com' }
6
+
7
+ # TODO
8
+ %w[get].each do |http_method|
9
+
10
+ describe ".#{http_method}" do
11
+ it { is_expected.to respond_to http_method }
12
+
13
+ describe 'creates a request' do
14
+
15
+ let(:args) {
16
+ { url: url, params: [{ q: 'lol' }] }
17
+ }
18
+
19
+ let(:request_double) {
20
+ instance_double described_class::Request
21
+ }
22
+
23
+ let(:page_double) {
24
+ instance_double Scrapula::Page
25
+ }
26
+
27
+ let(:expect_new_request) {
28
+ allow(request_double).to receive :execute
29
+ expect(described_class::Request).to receive(:new).and_return request_double
30
+ }
31
+
32
+ let(:expect_page_response) {
33
+ expect_new_request
34
+ allow(request_double).to receive(:execute).and_return page_double
35
+ }
36
+
37
+ describe 'with the' do
38
+ context 'URL received as String' do
39
+ it 'without parameters' do
40
+ expect_new_request.with hash_including({ url: args[:url] })
41
+
42
+ described_class.__send__ http_method, args[:url]
43
+ end
44
+
45
+ it 'with parameters' do
46
+ expect_new_request.with hash_including args
47
+
48
+ described_class.__send__ http_method, args[:url], args[:params]
49
+ end
50
+ end
51
+
52
+ it 'arguments received as Hash' do
53
+ expect_new_request.with hash_including args
54
+
55
+ described_class.__send__ http_method, args
56
+ end
57
+ end
58
+
59
+ it "inferring the #{http_method.upcase} method" do
60
+ expect_new_request.with hash_including method: http_method
61
+
62
+ described_class.__send__ http_method, args
63
+ end
64
+
65
+ it 'and executes it' do
66
+ expect(request_double).to receive :execute
67
+ expect(described_class::Request).to receive(:new).and_return request_double
68
+
69
+ described_class.__send__ http_method, args
70
+ end
71
+
72
+ # TODO
73
+ describe 'when fails' do
74
+ end
75
+
76
+ describe 'after receiving the page' do
77
+
78
+ context 'without block' do
79
+ it 'returns that page' do
80
+ expect_page_response
81
+ expect(described_class.__send__ http_method, args).to eq page_double
82
+ end
83
+ end
84
+
85
+ context 'with block' do
86
+
87
+ it 'scrapes that page using that block' do
88
+ expect_page_response
89
+
90
+ block = proc{}
91
+
92
+ expect(page_double).to receive(:scrape) do |&block|
93
+ expect(block).to be block
94
+ end
95
+
96
+ described_class.__send__ http_method, args, &block
97
+ end
98
+
99
+ it 'returns the scrapped data' do
100
+ expect_page_response
101
+ expect(page_double).to receive(:scrape).and_return({ example: 'example value' })
102
+
103
+ result = described_class.__send__ http_method, args do
104
+ example '#example'
105
+ end
106
+
107
+ expect(result).to eq({ example: 'example value' })
108
+ end
109
+ end
110
+ end
111
+
112
+ end
113
+ end
114
+ end
115
+
116
+ describe '.meta' do
117
+ let(:metas) { double }
118
+ let(:page_double) { instance_double Scrapula::Page }
119
+
120
+ it 'performs a GET request' do
121
+ allow(page_double).to receive(:meta!).and_return metas
122
+ expect(described_class).to receive(:get).with(url).and_return page_double
123
+ described_class.meta url
124
+ end
125
+
126
+ it 'returns the meta elements of the page' do
127
+ expect(page_double).to receive(:meta!).and_return metas
128
+ allow(described_class).to receive(:get).with(url).and_return page_double
129
+ expect(described_class.meta url).to eq metas
130
+ end
131
+ end
132
+
133
+ # TODO
134
+ describe '.metas' do
135
+ end
136
+
137
+ describe '.configure' do
138
+
139
+ end
140
+
141
+ end
@@ -0,0 +1,26 @@
1
+ if ENV['COVERAGE']
2
+ puts "\n\t > COVERAGE ON"
3
+ require 'simplecov'
4
+ end
5
+
6
+ require 'vcr'
7
+
8
+ require_relative '../lib/scrapula'
9
+
10
+ RSpec.configure do |config|
11
+ config.order = 'random'
12
+
13
+ VCR.configure do |config|
14
+ config.cassette_library_dir = 'spec/cassettes'
15
+ config.hook_into :webmock
16
+ config.ignore_localhost = true
17
+ config.allow_http_connections_when_no_cassette = false
18
+ config.default_cassette_options = {
19
+ decode_compressed_response: true,
20
+ record: :new_episodes,
21
+ }
22
+ end
23
+
24
+ config.extend VCR::RSpec::Macros
25
+
26
+ end
metadata ADDED
@@ -0,0 +1,118 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scrapula
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.6.3
5
+ platform: ruby
6
+ authors:
7
+ - Juan A. Martín Lucas
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 2.7.3
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '2.7'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 2.7.3
33
+ description: |2
34
+ Scrapula is a library for scraping web pages that simplifies some of the
35
+ common actions that are involved.
36
+
37
+ It has a very simple API that can be used in several ways and contexts, and
38
+ another, shorter, that facilitates processing pages when characters are
39
+ scarce, like irb / pry, or quick and dirty scripts.
40
+ email: scrapula@jaml.site
41
+ executables: []
42
+ extensions: []
43
+ extra_rdoc_files: []
44
+ files:
45
+ - ".gitignore"
46
+ - ".rspec"
47
+ - ".simplecov"
48
+ - CHANGELOG.md
49
+ - CONTRIBUTING.md
50
+ - Gemfile
51
+ - Gemfile.lock
52
+ - Guardfile
53
+ - LICENSE
54
+ - README.md
55
+ - ROADMAP.md
56
+ - Rakefile
57
+ - examples/block_syntax.rb
58
+ - examples/find_nodes.rb
59
+ - examples/get_first_and_scrape_later.rb
60
+ - examples/metas.rb
61
+ - examples/more_api.rb
62
+ - examples/nested_results.rb
63
+ - examples/one_liners.rb
64
+ - examples/posting_data.rb
65
+ - examples/s.rb
66
+ - examples/validation.rb
67
+ - lib/scrapula.rb
68
+ - lib/scrapula/_old_scraper.rb
69
+ - lib/scrapula/agent.rb
70
+ - lib/scrapula/data.rb
71
+ - lib/scrapula/page.rb
72
+ - lib/scrapula/page/meta.rb
73
+ - lib/scrapula/request.rb
74
+ - lib/scrapula/s.rb
75
+ - lib/scrapula/scraper.rb
76
+ - lib/scrapula/version.rb
77
+ - scrapula.gemspec
78
+ - spec/cassettes/Scrapula_Page_Meta/_.yml
79
+ - spec/cassettes/Scrapula_Page_Meta/_/Open_Graph.yml
80
+ - spec/cassettes/Scrapula_Page_Meta/_/other_names.yml
81
+ - spec/cassettes/Scrapula_Page_Meta/_/standard_names.yml
82
+ - spec/lib/scrapula/agent_spec.rb
83
+ - spec/lib/scrapula/data_spec.rb
84
+ - spec/lib/scrapula/page/meta_spec.rb
85
+ - spec/lib/scrapula/page_spec.rb
86
+ - spec/lib/scrapula/request_spec.rb
87
+ - spec/lib/scrapula/s_spec.rb
88
+ - spec/lib/scrapula/scraper_spec.rb
89
+ - spec/lib/scrapula_spec.rb
90
+ - spec/spec_helper.rb
91
+ homepage: http://github.com/j-a-m-l/scrapula
92
+ licenses:
93
+ - MIT
94
+ metadata: {}
95
+ post_install_message: |2
96
+ Thanks for installing, but keep in mind that this project is not complete!
97
+ rdoc_options:
98
+ - "--charset=UTF-8"
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ requirements: []
112
+ rubyforge_project:
113
+ rubygems_version: 2.2.2
114
+ signing_key:
115
+ specification_version: 4
116
+ summary: Succinctest scraper in the world?
117
+ test_files: []
118
+ has_rdoc: