scrapula 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +3 -0
- data/.rspec +1 -0
- data/.simplecov +1 -0
- data/CHANGELOG.md +15 -0
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +24 -0
- data/Gemfile.lock +127 -0
- data/Guardfile +12 -0
- data/LICENSE +21 -0
- data/README.md +108 -0
- data/ROADMAP.md +42 -0
- data/Rakefile +30 -0
- data/examples/block_syntax.rb +20 -0
- data/examples/find_nodes.rb +6 -0
- data/examples/get_first_and_scrape_later.rb +13 -0
- data/examples/metas.rb +32 -0
- data/examples/more_api.rb +17 -0
- data/examples/nested_results.rb +14 -0
- data/examples/one_liners.rb +9 -0
- data/examples/posting_data.rb +7 -0
- data/examples/s.rb +24 -0
- data/examples/validation.rb +40 -0
- data/lib/scrapula.rb +47 -0
- data/lib/scrapula/_old_scraper.rb +110 -0
- data/lib/scrapula/agent.rb +8 -0
- data/lib/scrapula/data.rb +18 -0
- data/lib/scrapula/page.rb +109 -0
- data/lib/scrapula/page/meta.rb +74 -0
- data/lib/scrapula/request.rb +44 -0
- data/lib/scrapula/s.rb +21 -0
- data/lib/scrapula/scraper.rb +56 -0
- data/lib/scrapula/version.rb +3 -0
- data/scrapula.gemspec +36 -0
- data/spec/cassettes/Scrapula_Page_Meta/_.yml +748 -0
- data/spec/cassettes/Scrapula_Page_Meta/_/Open_Graph.yml +322 -0
- data/spec/cassettes/Scrapula_Page_Meta/_/other_names.yml +586 -0
- data/spec/cassettes/Scrapula_Page_Meta/_/standard_names.yml +429 -0
- data/spec/lib/scrapula/agent_spec.rb +6 -0
- data/spec/lib/scrapula/data_spec.rb +19 -0
- data/spec/lib/scrapula/page/meta_spec.rb +89 -0
- data/spec/lib/scrapula/page_spec.rb +136 -0
- data/spec/lib/scrapula/request_spec.rb +91 -0
- data/spec/lib/scrapula/s_spec.rb +44 -0
- data/spec/lib/scrapula/scraper_spec.rb +205 -0
- data/spec/lib/scrapula_spec.rb +141 -0
- data/spec/spec_helper.rb +26 -0
- metadata +118 -0
@@ -0,0 +1,141 @@
|
|
1
|
+
require 'scrapula'
|
2
|
+
|
3
|
+
describe Scrapula do
|
4
|
+
|
5
|
+
let(:url) { 'http://example.com' }
|
6
|
+
|
7
|
+
# TODO
|
8
|
+
%w[get].each do |http_method|
|
9
|
+
|
10
|
+
describe ".#{http_method}" do
|
11
|
+
it { is_expected.to respond_to http_method }
|
12
|
+
|
13
|
+
describe 'creates a request' do
|
14
|
+
|
15
|
+
let(:args) {
|
16
|
+
{ url: url, params: [{ q: 'lol' }] }
|
17
|
+
}
|
18
|
+
|
19
|
+
let(:request_double) {
|
20
|
+
instance_double described_class::Request
|
21
|
+
}
|
22
|
+
|
23
|
+
let(:page_double) {
|
24
|
+
instance_double Scrapula::Page
|
25
|
+
}
|
26
|
+
|
27
|
+
let(:expect_new_request) {
|
28
|
+
allow(request_double).to receive :execute
|
29
|
+
expect(described_class::Request).to receive(:new).and_return request_double
|
30
|
+
}
|
31
|
+
|
32
|
+
let(:expect_page_response) {
|
33
|
+
expect_new_request
|
34
|
+
allow(request_double).to receive(:execute).and_return page_double
|
35
|
+
}
|
36
|
+
|
37
|
+
describe 'with the' do
|
38
|
+
context 'URL received as String' do
|
39
|
+
it 'without parameters' do
|
40
|
+
expect_new_request.with hash_including({ url: args[:url] })
|
41
|
+
|
42
|
+
described_class.__send__ http_method, args[:url]
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'with parameters' do
|
46
|
+
expect_new_request.with hash_including args
|
47
|
+
|
48
|
+
described_class.__send__ http_method, args[:url], args[:params]
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'arguments received as Hash' do
|
53
|
+
expect_new_request.with hash_including args
|
54
|
+
|
55
|
+
described_class.__send__ http_method, args
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
it "inferring the #{http_method.upcase} method" do
|
60
|
+
expect_new_request.with hash_including method: http_method
|
61
|
+
|
62
|
+
described_class.__send__ http_method, args
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'and executes it' do
|
66
|
+
expect(request_double).to receive :execute
|
67
|
+
expect(described_class::Request).to receive(:new).and_return request_double
|
68
|
+
|
69
|
+
described_class.__send__ http_method, args
|
70
|
+
end
|
71
|
+
|
72
|
+
# TODO
|
73
|
+
describe 'when fails' do
|
74
|
+
end
|
75
|
+
|
76
|
+
describe 'after receiving the page' do
|
77
|
+
|
78
|
+
context 'without block' do
|
79
|
+
it 'returns that page' do
|
80
|
+
expect_page_response
|
81
|
+
expect(described_class.__send__ http_method, args).to eq page_double
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
context 'with block' do
|
86
|
+
|
87
|
+
it 'scrapes that page using that block' do
|
88
|
+
expect_page_response
|
89
|
+
|
90
|
+
block = proc{}
|
91
|
+
|
92
|
+
expect(page_double).to receive(:scrape) do |&block|
|
93
|
+
expect(block).to be block
|
94
|
+
end
|
95
|
+
|
96
|
+
described_class.__send__ http_method, args, &block
|
97
|
+
end
|
98
|
+
|
99
|
+
it 'returns the scrapped data' do
|
100
|
+
expect_page_response
|
101
|
+
expect(page_double).to receive(:scrape).and_return({ example: 'example value' })
|
102
|
+
|
103
|
+
result = described_class.__send__ http_method, args do
|
104
|
+
example '#example'
|
105
|
+
end
|
106
|
+
|
107
|
+
expect(result).to eq({ example: 'example value' })
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
describe '.meta' do
|
117
|
+
let(:metas) { double }
|
118
|
+
let(:page_double) { instance_double Scrapula::Page }
|
119
|
+
|
120
|
+
it 'performs a GET request' do
|
121
|
+
allow(page_double).to receive(:meta!).and_return metas
|
122
|
+
expect(described_class).to receive(:get).with(url).and_return page_double
|
123
|
+
described_class.meta url
|
124
|
+
end
|
125
|
+
|
126
|
+
it 'returns the meta elements of the page' do
|
127
|
+
expect(page_double).to receive(:meta!).and_return metas
|
128
|
+
allow(described_class).to receive(:get).with(url).and_return page_double
|
129
|
+
expect(described_class.meta url).to eq metas
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# TODO
|
134
|
+
describe '.metas' do
|
135
|
+
end
|
136
|
+
|
137
|
+
describe '.configure' do
|
138
|
+
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
if ENV['COVERAGE']
|
2
|
+
puts "\n\t > COVERAGE ON"
|
3
|
+
require 'simplecov'
|
4
|
+
end
|
5
|
+
|
6
|
+
require 'vcr'
|
7
|
+
|
8
|
+
require_relative '../lib/scrapula'
|
9
|
+
|
10
|
+
RSpec.configure do |config|
|
11
|
+
config.order = 'random'
|
12
|
+
|
13
|
+
VCR.configure do |config|
|
14
|
+
config.cassette_library_dir = 'spec/cassettes'
|
15
|
+
config.hook_into :webmock
|
16
|
+
config.ignore_localhost = true
|
17
|
+
config.allow_http_connections_when_no_cassette = false
|
18
|
+
config.default_cassette_options = {
|
19
|
+
decode_compressed_response: true,
|
20
|
+
record: :new_episodes,
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
config.extend VCR::RSpec::Macros
|
25
|
+
|
26
|
+
end
|
metadata
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scrapula
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.6.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Juan A. Martín Lucas
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-09-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.7'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.7.3
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '2.7'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.7.3
|
33
|
+
description: |2
|
34
|
+
Scrapula is a library for scraping web pages that simplifies some of the
|
35
|
+
common actions that are involved.
|
36
|
+
|
37
|
+
It has a very simple API that can be used in several ways and contexts, and
|
38
|
+
another, shorter, that facilitates processing pages when characters are
|
39
|
+
scarce, like irb / pry, or quick and dirty scripts.
|
40
|
+
email: scrapula@jaml.site
|
41
|
+
executables: []
|
42
|
+
extensions: []
|
43
|
+
extra_rdoc_files: []
|
44
|
+
files:
|
45
|
+
- ".gitignore"
|
46
|
+
- ".rspec"
|
47
|
+
- ".simplecov"
|
48
|
+
- CHANGELOG.md
|
49
|
+
- CONTRIBUTING.md
|
50
|
+
- Gemfile
|
51
|
+
- Gemfile.lock
|
52
|
+
- Guardfile
|
53
|
+
- LICENSE
|
54
|
+
- README.md
|
55
|
+
- ROADMAP.md
|
56
|
+
- Rakefile
|
57
|
+
- examples/block_syntax.rb
|
58
|
+
- examples/find_nodes.rb
|
59
|
+
- examples/get_first_and_scrape_later.rb
|
60
|
+
- examples/metas.rb
|
61
|
+
- examples/more_api.rb
|
62
|
+
- examples/nested_results.rb
|
63
|
+
- examples/one_liners.rb
|
64
|
+
- examples/posting_data.rb
|
65
|
+
- examples/s.rb
|
66
|
+
- examples/validation.rb
|
67
|
+
- lib/scrapula.rb
|
68
|
+
- lib/scrapula/_old_scraper.rb
|
69
|
+
- lib/scrapula/agent.rb
|
70
|
+
- lib/scrapula/data.rb
|
71
|
+
- lib/scrapula/page.rb
|
72
|
+
- lib/scrapula/page/meta.rb
|
73
|
+
- lib/scrapula/request.rb
|
74
|
+
- lib/scrapula/s.rb
|
75
|
+
- lib/scrapula/scraper.rb
|
76
|
+
- lib/scrapula/version.rb
|
77
|
+
- scrapula.gemspec
|
78
|
+
- spec/cassettes/Scrapula_Page_Meta/_.yml
|
79
|
+
- spec/cassettes/Scrapula_Page_Meta/_/Open_Graph.yml
|
80
|
+
- spec/cassettes/Scrapula_Page_Meta/_/other_names.yml
|
81
|
+
- spec/cassettes/Scrapula_Page_Meta/_/standard_names.yml
|
82
|
+
- spec/lib/scrapula/agent_spec.rb
|
83
|
+
- spec/lib/scrapula/data_spec.rb
|
84
|
+
- spec/lib/scrapula/page/meta_spec.rb
|
85
|
+
- spec/lib/scrapula/page_spec.rb
|
86
|
+
- spec/lib/scrapula/request_spec.rb
|
87
|
+
- spec/lib/scrapula/s_spec.rb
|
88
|
+
- spec/lib/scrapula/scraper_spec.rb
|
89
|
+
- spec/lib/scrapula_spec.rb
|
90
|
+
- spec/spec_helper.rb
|
91
|
+
homepage: http://github.com/j-a-m-l/scrapula
|
92
|
+
licenses:
|
93
|
+
- MIT
|
94
|
+
metadata: {}
|
95
|
+
post_install_message: |2
|
96
|
+
Thanks for installing, but keep in mind that this project is not complete!
|
97
|
+
rdoc_options:
|
98
|
+
- "--charset=UTF-8"
|
99
|
+
require_paths:
|
100
|
+
- lib
|
101
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
version: '0'
|
106
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
requirements: []
|
112
|
+
rubyforge_project:
|
113
|
+
rubygems_version: 2.2.2
|
114
|
+
signing_key:
|
115
|
+
specification_version: 4
|
116
|
+
summary: Succinctest scraper in the world?
|
117
|
+
test_files: []
|
118
|
+
has_rdoc:
|