scrapouille 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +31 -3
- data/scrapouille.gemspec +3 -5
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0edc7fb52550844caa8d189373a577b113462c8
|
4
|
+
data.tar.gz: 5b76257e9540d9f92e46d502d8346518fba1589b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4b1924dab7efceb32eea3a684e6a42a294d746ed98f8bd30096640e100e4c54419d8f3dfad1af5492a60d19ae1cb77594cb4c28a0f3e8e3f0451a4176e05adeb
|
7
|
+
data.tar.gz: ca5405c1da48f22add8cb924df2df458d7ab3470b2e017f87ac4d17dde677fe0794ae03751ab172fd41eac6eb623ebc7108c0c5405d63b63b77c45f8f70dcb9d
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,35 @@
|
|
1
|
-
Declarative scraper
|
1
|
+
Declarative HTML scraper
|
2
2
|
|
3
3
|
# Usage
|
4
4
|
|
5
|
-
Test
|
5
|
+
### Test
|
6
|
+
|
7
|
+
rake
|
8
|
+
|
9
|
+
### Scrap
|
10
|
+
|
11
|
+
Define a scraper
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
scraper = Scrapouille.new do
|
15
|
+
scrap 'fullname', at: "//div[@class='player-name']/h1/child::text()"
|
16
|
+
scrap 'image_url', at: "//div[@id='basic']//img/attribute::src"
|
17
|
+
scrap 'rank', at: "//div[@class='position']/text()" do |c|
|
18
|
+
Integer(c.sub('#', ''))
|
19
|
+
end
|
20
|
+
end
|
21
|
+
```
|
22
|
+
|
23
|
+
Use you scraper instance on an URI (as defined by `open-uri`: filepath, http, ...)
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
results = scraper.scrap!('http://tennis-player.com/richard-gasquet')
|
27
|
+
results['fullname'] # => 'Richard Gasquest'
|
28
|
+
```
|
29
|
+
|
30
|
+
You can test your xpath expression with a local HTML filepath
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
scraper.scrap!(File.join('..', 'player.html'))
|
34
|
+
```
|
6
35
|
|
7
|
-
rake
|
data/scrapouille.gemspec
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
3
|
-
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
2
|
|
5
3
|
Gem::Specification.new do |spec|
|
6
4
|
spec.name = "scrapouille"
|
7
|
-
spec.version = "0.0.
|
5
|
+
spec.version = "0.0.2"
|
8
6
|
spec.authors = ["simcap"]
|
9
|
-
spec.summary = %q{Simpe declarative scrapper}
|
10
|
-
spec.description = %q{Simpe declarative scrapper}
|
7
|
+
spec.summary = %q{Simpe declarative HTML scrapper}
|
8
|
+
spec.description = %q{Simpe declarative HTML scrapper}
|
11
9
|
spec.homepage = "https://github.com/simcap/scrapouille"
|
12
10
|
|
13
11
|
spec.files = `git ls-files -z`.split("\x0")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapouille
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- simcap
|
@@ -52,7 +52,7 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
description: Simpe declarative scrapper
|
55
|
+
description: Simpe declarative HTML scrapper
|
56
56
|
email:
|
57
57
|
executables: []
|
58
58
|
extensions: []
|
@@ -91,7 +91,7 @@ rubyforge_project:
|
|
91
91
|
rubygems_version: 2.2.2
|
92
92
|
signing_key:
|
93
93
|
specification_version: 4
|
94
|
-
summary: Simpe declarative scrapper
|
94
|
+
summary: Simpe declarative HTML scrapper
|
95
95
|
test_files:
|
96
96
|
- test/fixtures/tennis-player.html
|
97
97
|
- test/helper.rb
|