dogshoe 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +30 -0
- data/lib/dog_shoe.rb +4 -0
- data/lib/dog_shoe/base.rb +50 -0
- data/lib/dog_shoe/table.rb +48 -0
- data/lib/dog_shoe/ui.rb +0 -0
- data/lib/dog_shoe/version.rb +3 -0
- data/spec/base_spec.rb +30 -0
- data/spec/html/table-with-headers.html +31 -0
- data/spec/html/table.html +39 -0
- data/spec/models/eg_table.rb +7 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/table_spec.rb +24 -0
- metadata +82 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 073f3b5eeebc94df2ba0b816f19b8e48ada3fb34
|
|
4
|
+
data.tar.gz: 7616865d53111a61e6476833ff325e4f94e762f4
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 2ebfe357c0787a5fed3d6f738a3274c3b2aa164470e681da18c0abe7f3654d5b4ca2500a8c4dded6bbf805abd804263174584b2283481035862a5b7dd6a9c20a
|
|
7
|
+
data.tar.gz: f107343ec884c130b1784ab3c01981350f56e69be22b2a24145d3abe43e3536ad377958da3cd78bb31ee63d4e07b041be9e4e0b5c0bcd42f22850a6549aad4ee
|
data/README.md
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# dogshoe
|
|
2
|
+
Ruby ORM backed by scraped websites
|
|
3
|
+
|
|
4
|
+
Say we are given the following HTML on anyotherwebpage.com
|
|
5
|
+
|
|
6
|
+
<table class="some_class">
|
|
7
|
+
<tr>
|
|
8
|
+
<th>Some Thing</th><th>Some Thing Else</th>
|
|
9
|
+
</tr>
|
|
10
|
+
<tr>
|
|
11
|
+
<td>first value</td><td>1000</td>
|
|
12
|
+
</tr>
|
|
13
|
+
<tr>
|
|
14
|
+
<td>second value</td><td>2000</td>
|
|
15
|
+
</tr>
|
|
16
|
+
</table>
|
|
17
|
+
|
|
18
|
+
Let's stop being polite, and start modeling!
|
|
19
|
+
|
|
20
|
+
things = DogShoe::Table.find(
|
|
21
|
+
url: 'http://anyotherwebpage.com',
|
|
22
|
+
css: '.some_class'
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
=> [#<DogShoe::Table>, #<DogShoe::Table>]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
this.first.some_thing
|
|
29
|
+
|
|
30
|
+
=> 'first value'
|
data/lib/dog_shoe.rb
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
require 'open-uri'
|
|
2
|
+
require 'nokogiri'
|
|
3
|
+
|
|
4
|
+
module DogShoe
|
|
5
|
+
class Base
|
|
6
|
+
|
|
7
|
+
attr_reader :doc
|
|
8
|
+
|
|
9
|
+
def initialize(attributes = {})
|
|
10
|
+
@values = attributes[:values]
|
|
11
|
+
@keys = attributes[:keys].collect{|k| k.to_sym}
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def [](key)
|
|
15
|
+
@values[key]
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class << self
|
|
19
|
+
def parse(body)
|
|
20
|
+
Nokogiri::HTML(body)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def fetch(uri)
|
|
24
|
+
open(uri)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def css(selector)
|
|
28
|
+
@doc.css(selector)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def values
|
|
32
|
+
@values
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def find(attrs)
|
|
36
|
+
@doc = parse(fetch(attrs[:url]))
|
|
37
|
+
results = css(attrs[:css])
|
|
38
|
+
#self.class.new(results)
|
|
39
|
+
results
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def method_missing(name, *args, &block)
|
|
45
|
+
super unless @keys.include? name
|
|
46
|
+
@values[@keys.index(name)]
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
module DogShoe
|
|
2
|
+
class Table < Base
|
|
3
|
+
|
|
4
|
+
class << self
|
|
5
|
+
def find(attrs)
|
|
6
|
+
@doc = parse(fetch(attrs[:url]))
|
|
7
|
+
css(attrs[:css])
|
|
8
|
+
@values.collect do |result|
|
|
9
|
+
new(values: result, keys: @keys)
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def css(selector, args=[])
|
|
14
|
+
out = super(selector)#, *args)
|
|
15
|
+
table_to_magic(out, *args)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def table_to_magic(table, *args)
|
|
19
|
+
trs = table.css('tr')
|
|
20
|
+
|
|
21
|
+
@keys = table.css('th').collect do |td|
|
|
22
|
+
scrub(td.text).gsub(/[^0-9a-zA-Z\s]/,'').gsub(/\s/,'_').downcase
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
@values = trs.collect do |tr|
|
|
26
|
+
tr.css('td').collect do |td|
|
|
27
|
+
td.text
|
|
28
|
+
end
|
|
29
|
+
end.reject{|tr| tr.empty? }
|
|
30
|
+
|
|
31
|
+
@values = @values.collect do |trs|
|
|
32
|
+
trs.collect do |td|
|
|
33
|
+
#td =~ /[0-9]/ ?
|
|
34
|
+
td.gsub(/[^0-9]/,'').to_f
|
|
35
|
+
#: td
|
|
36
|
+
end
|
|
37
|
+
end if args.include?(:numeric)
|
|
38
|
+
#throw DogShoe::NotFound if nothing found
|
|
39
|
+
@values
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def scrub(text)
|
|
43
|
+
text.gsub("\n",'').gsub(/^\s*/,'').gsub(/\s*$/,'')
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
end
|
|
48
|
+
end
|
data/lib/dog_shoe/ui.rb
ADDED
|
File without changes
|
data/spec/base_spec.rb
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe DogShoe::Table do
|
|
4
|
+
|
|
5
|
+
it 'hydrates an object' do
|
|
6
|
+
#stub
|
|
7
|
+
results = EgTable.find(
|
|
8
|
+
url: 'http://example.com',
|
|
9
|
+
css: '.example'
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
expect( results.first[0] ).to eq('1st')
|
|
13
|
+
expect( results.first[1] ).to eq('2nd')
|
|
14
|
+
expect( results.first[2] ).to eq('3rd')
|
|
15
|
+
|
|
16
|
+
expect( results[1][0] ).to eq('2-1st')
|
|
17
|
+
|
|
18
|
+
expect( results[2][0] ).to eq('3-1st')
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it 'parses a table' do
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
it 'uses headers as attributes' do
|
|
25
|
+
eg = EgTable.new('http://example.com')
|
|
26
|
+
results = eg.css('.example')
|
|
27
|
+
results.first.first = '1st'
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
<html>
|
|
2
|
+
<body>
|
|
3
|
+
<table class="example">
|
|
4
|
+
<th>
|
|
5
|
+
|
|
6
|
+
</th>
|
|
7
|
+
<tr>
|
|
8
|
+
<td>1st</td>
|
|
9
|
+
<td>2nd</td>
|
|
10
|
+
<td>3rd</td>
|
|
11
|
+
<td>4th</td>
|
|
12
|
+
<td>5th</td>
|
|
13
|
+
</tr>
|
|
14
|
+
<tr>
|
|
15
|
+
<td>2-1st</td>
|
|
16
|
+
<td>2-2nd</td>
|
|
17
|
+
<td>2-3rd</td>
|
|
18
|
+
<td>2-4th</td>
|
|
19
|
+
<td>2-5th</td>
|
|
20
|
+
</tr>
|
|
21
|
+
<tr>
|
|
22
|
+
<td>3-1st</td>
|
|
23
|
+
<td>3-2nd</td>
|
|
24
|
+
<td>3-3rd</td>
|
|
25
|
+
<td>3-4th</td>
|
|
26
|
+
<td>3-5th</td>
|
|
27
|
+
</tr>
|
|
28
|
+
|
|
29
|
+
</table>
|
|
30
|
+
</body>
|
|
31
|
+
</html>
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
<html>
|
|
2
|
+
<body>
|
|
3
|
+
<table class="example">
|
|
4
|
+
<tr>
|
|
5
|
+
<th>first</th>
|
|
6
|
+
<th>second</th>
|
|
7
|
+
<th>first</th>
|
|
8
|
+
<th>first</th>
|
|
9
|
+
<th>first</th>
|
|
10
|
+
</tr>
|
|
11
|
+
<tr>
|
|
12
|
+
<td>1st</td>
|
|
13
|
+
<td>2nd</td>
|
|
14
|
+
<td>3rd</td>
|
|
15
|
+
<td>4th</td>
|
|
16
|
+
<td>5th</td>
|
|
17
|
+
</tr>
|
|
18
|
+
<tr>
|
|
19
|
+
<td>2-1st</td>
|
|
20
|
+
<td>2-2nd</td>
|
|
21
|
+
<td>2-3rd</td>
|
|
22
|
+
<td>2-4th</td>
|
|
23
|
+
<td>2-5th</td>
|
|
24
|
+
</tr>
|
|
25
|
+
<tr>
|
|
26
|
+
<td>3-1st</td>
|
|
27
|
+
<td>3-2nd</td>
|
|
28
|
+
<td>3-3rd</td>
|
|
29
|
+
<td>3-4th</td>
|
|
30
|
+
<td>3-5th</td>
|
|
31
|
+
</tr>
|
|
32
|
+
</table>
|
|
33
|
+
<ul>
|
|
34
|
+
<li>one</li>
|
|
35
|
+
<li>two</li>
|
|
36
|
+
<li>three</li>
|
|
37
|
+
</ul>
|
|
38
|
+
</body>
|
|
39
|
+
</html>
|
data/spec/spec_helper.rb
ADDED
data/spec/table_spec.rb
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe DogShoe::Table do
|
|
4
|
+
|
|
5
|
+
it 'finds a model' do
|
|
6
|
+
#stub
|
|
7
|
+
results = DogShoe::Table.find(
|
|
8
|
+
url: 'http://www.calottery.com/play/scratchers-games/$30-scratchers/30th-anniversary-1183',
|
|
9
|
+
css: '.draw_games'
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
results.first
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
it 'parses a table' do
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it 'uses headers as attributes' do
|
|
19
|
+
eg = EgTable.new('http://example.com')
|
|
20
|
+
results = eg.css('.example')
|
|
21
|
+
results.first.first = '1st'
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: dogshoe
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Trevor Grayson
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2015-10-17 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: rake
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rspec
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: 2.0.0
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: 2.0.0
|
|
41
|
+
description: Modeling backed with scraped website data
|
|
42
|
+
email: trevor@ipsumllc.com
|
|
43
|
+
executables: []
|
|
44
|
+
extensions: []
|
|
45
|
+
extra_rdoc_files: []
|
|
46
|
+
files:
|
|
47
|
+
- README.md
|
|
48
|
+
- lib/dog_shoe.rb
|
|
49
|
+
- lib/dog_shoe/base.rb
|
|
50
|
+
- lib/dog_shoe/table.rb
|
|
51
|
+
- lib/dog_shoe/ui.rb
|
|
52
|
+
- lib/dog_shoe/version.rb
|
|
53
|
+
- spec/base_spec.rb
|
|
54
|
+
- spec/html/table-with-headers.html
|
|
55
|
+
- spec/html/table.html
|
|
56
|
+
- spec/models/eg_table.rb
|
|
57
|
+
- spec/spec_helper.rb
|
|
58
|
+
- spec/table_spec.rb
|
|
59
|
+
homepage: http://github.com/trevorgrayson/dogshoe
|
|
60
|
+
licenses: []
|
|
61
|
+
metadata: {}
|
|
62
|
+
post_install_message:
|
|
63
|
+
rdoc_options: []
|
|
64
|
+
require_paths:
|
|
65
|
+
- lib
|
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
67
|
+
requirements:
|
|
68
|
+
- - ">="
|
|
69
|
+
- !ruby/object:Gem::Version
|
|
70
|
+
version: '0'
|
|
71
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - ">="
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '0'
|
|
76
|
+
requirements: []
|
|
77
|
+
rubyforge_project:
|
|
78
|
+
rubygems_version: 2.4.5.1
|
|
79
|
+
signing_key:
|
|
80
|
+
specification_version: 4
|
|
81
|
+
summary: ORM backed by scraped websites
|
|
82
|
+
test_files: []
|