scrapouille 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +31 -3
- data/scrapouille.gemspec +3 -5
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0edc7fb52550844caa8d189373a577b113462c8
|
4
|
+
data.tar.gz: 5b76257e9540d9f92e46d502d8346518fba1589b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b1924dab7efceb32eea3a684e6a42a294d746ed98f8bd30096640e100e4c54419d8f3dfad1af5492a60d19ae1cb77594cb4c28a0f3e8e3f0451a4176e05adeb
|
7
|
+
data.tar.gz: ca5405c1da48f22add8cb924df2df458d7ab3470b2e017f87ac4d17dde677fe0794ae03751ab172fd41eac6eb623ebc7108c0c5405d63b63b77c45f8f70dcb9d
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,35 @@
|
|
1
|
-
Declarative scraper
|
1
|
+
Declarative HTML scraper
|
2
2
|
|
3
3
|
# Usage
|
4
4
|
|
5
|
-
Test
|
5
|
+
### Test
|
6
|
+
|
7
|
+
rake
|
8
|
+
|
9
|
+
### Scrap
|
10
|
+
|
11
|
+
Define a scraper
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
scraper = Scrapouille.new do
|
15
|
+
scrap 'fullname', at: "//div[@class='player-name']/h1/child::text()"
|
16
|
+
scrap 'image_url', at: "//div[@id='basic']//img/attribute::src"
|
17
|
+
scrap 'rank', at: "//div[@class='position']/text()" do |c|
|
18
|
+
Integer(c.sub('#', ''))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
```
|
22
|
+
|
23
|
+
Use you scraper instance on an URI (as defined by `open-uri`: filepath, http, ...)
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
results = scraper.scrap!('http://tennis-player.com/richard-gasquet')
|
27
|
+
results['fullname'] # => 'Richard Gasquest'
|
28
|
+
```
|
29
|
+
|
30
|
+
You can test your xpath expression with a local HTML filepath
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
scraper.scrap!(File.join('..', 'player.html'))
|
34
|
+
```
|
6
35
|
|
7
|
-
rake
|
data/scrapouille.gemspec
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
3
|
-
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
2
|
|
5
3
|
Gem::Specification.new do |spec|
|
6
4
|
spec.name = "scrapouille"
|
7
|
-
spec.version = "0.0.
|
5
|
+
spec.version = "0.0.2"
|
8
6
|
spec.authors = ["simcap"]
|
9
|
-
spec.summary = %q{Simpe declarative scrapper}
|
10
|
-
spec.description = %q{Simpe declarative scrapper}
|
7
|
+
spec.summary = %q{Simpe declarative HTML scrapper}
|
8
|
+
spec.description = %q{Simpe declarative HTML scrapper}
|
11
9
|
spec.homepage = "https://github.com/simcap/scrapouille"
|
12
10
|
|
13
11
|
spec.files = `git ls-files -z`.split("\x0")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapouille
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- simcap
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
description: Simpe declarative scrapper
|
55
|
+
description: Simpe declarative HTML scrapper
|
56
56
|
email:
|
57
57
|
executables: []
|
58
58
|
extensions: []
|
@@ -91,7 +91,7 @@ rubyforge_project:
|
|
91
91
|
rubygems_version: 2.2.2
|
92
92
|
signing_key:
|
93
93
|
specification_version: 4
|
94
|
-
summary: Simpe declarative scrapper
|
94
|
+
summary: Simpe declarative HTML scrapper
|
95
95
|
test_files:
|
96
96
|
- test/fixtures/tennis-player.html
|
97
97
|
- test/helper.rb
|