dogshoe 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +30 -0
- data/lib/dog_shoe.rb +4 -0
- data/lib/dog_shoe/base.rb +50 -0
- data/lib/dog_shoe/table.rb +48 -0
- data/lib/dog_shoe/ui.rb +0 -0
- data/lib/dog_shoe/version.rb +3 -0
- data/spec/base_spec.rb +30 -0
- data/spec/html/table-with-headers.html +31 -0
- data/spec/html/table.html +39 -0
- data/spec/models/eg_table.rb +7 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/table_spec.rb +24 -0
- metadata +82 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 073f3b5eeebc94df2ba0b816f19b8e48ada3fb34
|
4
|
+
data.tar.gz: 7616865d53111a61e6476833ff325e4f94e762f4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2ebfe357c0787a5fed3d6f738a3274c3b2aa164470e681da18c0abe7f3654d5b4ca2500a8c4dded6bbf805abd804263174584b2283481035862a5b7dd6a9c20a
|
7
|
+
data.tar.gz: f107343ec884c130b1784ab3c01981350f56e69be22b2a24145d3abe43e3536ad377958da3cd78bb31ee63d4e07b041be9e4e0b5c0bcd42f22850a6549aad4ee
|
data/README.md
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# dogshoe
|
2
|
+
Ruby ORM backed by scraped websites
|
3
|
+
|
4
|
+
Say we are given the following HTML on anyotherwebpage.com
|
5
|
+
|
6
|
+
<table class="some_class">
|
7
|
+
<tr>
|
8
|
+
<th>Some Thing</th><th>Some Thing Else</th>
|
9
|
+
</tr>
|
10
|
+
<tr>
|
11
|
+
<td>first value</td><td>1000</td>
|
12
|
+
</tr>
|
13
|
+
<tr>
|
14
|
+
<td>second value</td><td>2000</td>
|
15
|
+
</tr>
|
16
|
+
</table>
|
17
|
+
|
18
|
+
Let's stop being polite, and start modeling!
|
19
|
+
|
20
|
+
things = DogShoe::Table.find(
|
21
|
+
url: 'http://anyotherwebpage.com',
|
22
|
+
css: '.some_class'
|
23
|
+
)
|
24
|
+
|
25
|
+
=> [#<DogShoe::Table>, #<DogShoe::Table>]
|
26
|
+
|
27
|
+
|
28
|
+
this.first.some_thing
|
29
|
+
|
30
|
+
=> 'first value'
|
data/lib/dog_shoe.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module DogShoe
|
5
|
+
class Base
|
6
|
+
|
7
|
+
attr_reader :doc
|
8
|
+
|
9
|
+
def initialize(attributes = {})
|
10
|
+
@values = attributes[:values]
|
11
|
+
@keys = attributes[:keys].collect{|k| k.to_sym}
|
12
|
+
end
|
13
|
+
|
14
|
+
def [](key)
|
15
|
+
@values[key]
|
16
|
+
end
|
17
|
+
|
18
|
+
class << self
|
19
|
+
def parse(body)
|
20
|
+
Nokogiri::HTML(body)
|
21
|
+
end
|
22
|
+
|
23
|
+
def fetch(uri)
|
24
|
+
open(uri)
|
25
|
+
end
|
26
|
+
|
27
|
+
def css(selector)
|
28
|
+
@doc.css(selector)
|
29
|
+
end
|
30
|
+
|
31
|
+
def values
|
32
|
+
@values
|
33
|
+
end
|
34
|
+
|
35
|
+
def find(attrs)
|
36
|
+
@doc = parse(fetch(attrs[:url]))
|
37
|
+
results = css(attrs[:css])
|
38
|
+
#self.class.new(results)
|
39
|
+
results
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
def method_missing(name, *args, &block)
|
45
|
+
super unless @keys.include? name
|
46
|
+
@values[@keys.index(name)]
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module DogShoe
|
2
|
+
class Table < Base
|
3
|
+
|
4
|
+
class << self
|
5
|
+
def find(attrs)
|
6
|
+
@doc = parse(fetch(attrs[:url]))
|
7
|
+
css(attrs[:css])
|
8
|
+
@values.collect do |result|
|
9
|
+
new(values: result, keys: @keys)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def css(selector, args=[])
|
14
|
+
out = super(selector)#, *args)
|
15
|
+
table_to_magic(out, *args)
|
16
|
+
end
|
17
|
+
|
18
|
+
def table_to_magic(table, *args)
|
19
|
+
trs = table.css('tr')
|
20
|
+
|
21
|
+
@keys = table.css('th').collect do |td|
|
22
|
+
scrub(td.text).gsub(/[^0-9a-zA-Z\s]/,'').gsub(/\s/,'_').downcase
|
23
|
+
end
|
24
|
+
|
25
|
+
@values = trs.collect do |tr|
|
26
|
+
tr.css('td').collect do |td|
|
27
|
+
td.text
|
28
|
+
end
|
29
|
+
end.reject{|tr| tr.empty? }
|
30
|
+
|
31
|
+
@values = @values.collect do |trs|
|
32
|
+
trs.collect do |td|
|
33
|
+
#td =~ /[0-9]/ ?
|
34
|
+
td.gsub(/[^0-9]/,'').to_f
|
35
|
+
#: td
|
36
|
+
end
|
37
|
+
end if args.include?(:numeric)
|
38
|
+
#throw DogShoe::NotFound if nothing found
|
39
|
+
@values
|
40
|
+
end
|
41
|
+
|
42
|
+
def scrub(text)
|
43
|
+
text.gsub("\n",'').gsub(/^\s*/,'').gsub(/\s*$/,'')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
data/lib/dog_shoe/ui.rb
ADDED
File without changes
|
data/spec/base_spec.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DogShoe::Table do
|
4
|
+
|
5
|
+
it 'hydrates an object' do
|
6
|
+
#stub
|
7
|
+
results = EgTable.find(
|
8
|
+
url: 'http://example.com',
|
9
|
+
css: '.example'
|
10
|
+
)
|
11
|
+
|
12
|
+
expect( results.first[0] ).to eq('1st')
|
13
|
+
expect( results.first[1] ).to eq('2nd')
|
14
|
+
expect( results.first[2] ).to eq('3rd')
|
15
|
+
|
16
|
+
expect( results[1][0] ).to eq('2-1st')
|
17
|
+
|
18
|
+
expect( results[2][0] ).to eq('3-1st')
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'parses a table' do
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'uses headers as attributes' do
|
25
|
+
eg = EgTable.new('http://example.com')
|
26
|
+
results = eg.css('.example')
|
27
|
+
results.first.first = '1st'
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
<html>
|
2
|
+
<body>
|
3
|
+
<table class="example">
|
4
|
+
<th>
|
5
|
+
|
6
|
+
</th>
|
7
|
+
<tr>
|
8
|
+
<td>1st</td>
|
9
|
+
<td>2nd</td>
|
10
|
+
<td>3rd</td>
|
11
|
+
<td>4th</td>
|
12
|
+
<td>5th</td>
|
13
|
+
</tr>
|
14
|
+
<tr>
|
15
|
+
<td>2-1st</td>
|
16
|
+
<td>2-2nd</td>
|
17
|
+
<td>2-3rd</td>
|
18
|
+
<td>2-4th</td>
|
19
|
+
<td>2-5th</td>
|
20
|
+
</tr>
|
21
|
+
<tr>
|
22
|
+
<td>3-1st</td>
|
23
|
+
<td>3-2nd</td>
|
24
|
+
<td>3-3rd</td>
|
25
|
+
<td>3-4th</td>
|
26
|
+
<td>3-5th</td>
|
27
|
+
</tr>
|
28
|
+
|
29
|
+
</table>
|
30
|
+
</body>
|
31
|
+
</html>
|
@@ -0,0 +1,39 @@
|
|
1
|
+
<html>
|
2
|
+
<body>
|
3
|
+
<table class="example">
|
4
|
+
<tr>
|
5
|
+
<th>first</th>
|
6
|
+
<th>second</th>
|
7
|
+
<th>first</th>
|
8
|
+
<th>first</th>
|
9
|
+
<th>first</th>
|
10
|
+
</tr>
|
11
|
+
<tr>
|
12
|
+
<td>1st</td>
|
13
|
+
<td>2nd</td>
|
14
|
+
<td>3rd</td>
|
15
|
+
<td>4th</td>
|
16
|
+
<td>5th</td>
|
17
|
+
</tr>
|
18
|
+
<tr>
|
19
|
+
<td>2-1st</td>
|
20
|
+
<td>2-2nd</td>
|
21
|
+
<td>2-3rd</td>
|
22
|
+
<td>2-4th</td>
|
23
|
+
<td>2-5th</td>
|
24
|
+
</tr>
|
25
|
+
<tr>
|
26
|
+
<td>3-1st</td>
|
27
|
+
<td>3-2nd</td>
|
28
|
+
<td>3-3rd</td>
|
29
|
+
<td>3-4th</td>
|
30
|
+
<td>3-5th</td>
|
31
|
+
</tr>
|
32
|
+
</table>
|
33
|
+
<ul>
|
34
|
+
<li>one</li>
|
35
|
+
<li>two</li>
|
36
|
+
<li>three</li>
|
37
|
+
</ul>
|
38
|
+
</body>
|
39
|
+
</html>
|
data/spec/spec_helper.rb
ADDED
data/spec/table_spec.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe DogShoe::Table do
|
4
|
+
|
5
|
+
it 'finds a model' do
|
6
|
+
#stub
|
7
|
+
results = DogShoe::Table.find(
|
8
|
+
url: 'http://www.calottery.com/play/scratchers-games/$30-scratchers/30th-anniversary-1183',
|
9
|
+
css: '.draw_games'
|
10
|
+
)
|
11
|
+
|
12
|
+
results.first
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'parses a table' do
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'uses headers as attributes' do
|
19
|
+
eg = EgTable.new('http://example.com')
|
20
|
+
results = eg.css('.example')
|
21
|
+
results.first.first = '1st'
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dogshoe
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Trevor Grayson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-10-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 2.0.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 2.0.0
|
41
|
+
description: Modeling backed with scraped website data
|
42
|
+
email: trevor@ipsumllc.com
|
43
|
+
executables: []
|
44
|
+
extensions: []
|
45
|
+
extra_rdoc_files: []
|
46
|
+
files:
|
47
|
+
- README.md
|
48
|
+
- lib/dog_shoe.rb
|
49
|
+
- lib/dog_shoe/base.rb
|
50
|
+
- lib/dog_shoe/table.rb
|
51
|
+
- lib/dog_shoe/ui.rb
|
52
|
+
- lib/dog_shoe/version.rb
|
53
|
+
- spec/base_spec.rb
|
54
|
+
- spec/html/table-with-headers.html
|
55
|
+
- spec/html/table.html
|
56
|
+
- spec/models/eg_table.rb
|
57
|
+
- spec/spec_helper.rb
|
58
|
+
- spec/table_spec.rb
|
59
|
+
homepage: http://github.com/trevorgrayson/dogshoe
|
60
|
+
licenses: []
|
61
|
+
metadata: {}
|
62
|
+
post_install_message:
|
63
|
+
rdoc_options: []
|
64
|
+
require_paths:
|
65
|
+
- lib
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
requirements: []
|
77
|
+
rubyforge_project:
|
78
|
+
rubygems_version: 2.4.5.1
|
79
|
+
signing_key:
|
80
|
+
specification_version: 4
|
81
|
+
summary: ORM backed by scraped websites
|
82
|
+
test_files: []
|