dogshoe 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 073f3b5eeebc94df2ba0b816f19b8e48ada3fb34
4
+ data.tar.gz: 7616865d53111a61e6476833ff325e4f94e762f4
5
+ SHA512:
6
+ metadata.gz: 2ebfe357c0787a5fed3d6f738a3274c3b2aa164470e681da18c0abe7f3654d5b4ca2500a8c4dded6bbf805abd804263174584b2283481035862a5b7dd6a9c20a
7
+ data.tar.gz: f107343ec884c130b1784ab3c01981350f56e69be22b2a24145d3abe43e3536ad377958da3cd78bb31ee63d4e07b041be9e4e0b5c0bcd42f22850a6549aad4ee
data/README.md ADDED
@@ -0,0 +1,30 @@
1
+ # dogshoe
2
+ Ruby ORM backed by scraped websites
3
+
4
+ Say we are given the following HTML on anyotherwebpage.com
5
+
6
+ <table class="some_class">
7
+ <tr>
8
+ <th>Some Thing</th><th>Some Thing Else</th>
9
+ </tr>
10
+ <tr>
11
+ <td>first value</td><td>1000</td>
12
+ </tr>
13
+ <tr>
14
+ <td>second value</td><td>2000</td>
15
+ </tr>
16
+ </table>
17
+
18
+ Let's stop being polite, and start modeling!
19
+
20
+ things = DogShoe::Table.find(
21
+ url: 'http://anyotherwebpage.com',
22
+ css: '.some_class'
23
+ )
24
+
25
+ => [#<DogShoe::Table>, #<DogShoe::Table>]
26
+
27
+
28
+ this.first.some_thing
29
+
30
+ => 'first value'
data/lib/dog_shoe.rb ADDED
@@ -0,0 +1,4 @@
1
+ #require 'dog_shoe/dog_shoe'
2
+ require 'dog_shoe/base'
3
+ require 'dog_shoe/table'
4
+ require 'dog_shoe/ui'
@@ -0,0 +1,50 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+ module DogShoe
5
+ class Base
6
+
7
+ attr_reader :doc
8
+
9
+ def initialize(attributes = {})
10
+ @values = attributes[:values]
11
+ @keys = attributes[:keys].collect{|k| k.to_sym}
12
+ end
13
+
14
+ def [](key)
15
+ @values[key]
16
+ end
17
+
18
+ class << self
19
+ def parse(body)
20
+ Nokogiri::HTML(body)
21
+ end
22
+
23
+ def fetch(uri)
24
+ open(uri)
25
+ end
26
+
27
+ def css(selector)
28
+ @doc.css(selector)
29
+ end
30
+
31
+ def values
32
+ @values
33
+ end
34
+
35
+ def find(attrs)
36
+ @doc = parse(fetch(attrs[:url]))
37
+ results = css(attrs[:css])
38
+ #self.class.new(results)
39
+ results
40
+ end
41
+
42
+ end
43
+
44
+ def method_missing(name, *args, &block)
45
+ super unless @keys.include? name
46
+ @values[@keys.index(name)]
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,48 @@
1
+ module DogShoe
2
+ class Table < Base
3
+
4
+ class << self
5
+ def find(attrs)
6
+ @doc = parse(fetch(attrs[:url]))
7
+ css(attrs[:css])
8
+ @values.collect do |result|
9
+ new(values: result, keys: @keys)
10
+ end
11
+ end
12
+
13
+ def css(selector, args=[])
14
+ out = super(selector)#, *args)
15
+ table_to_magic(out, *args)
16
+ end
17
+
18
+ def table_to_magic(table, *args)
19
+ trs = table.css('tr')
20
+
21
+ @keys = table.css('th').collect do |td|
22
+ scrub(td.text).gsub(/[^0-9a-zA-Z\s]/,'').gsub(/\s/,'_').downcase
23
+ end
24
+
25
+ @values = trs.collect do |tr|
26
+ tr.css('td').collect do |td|
27
+ td.text
28
+ end
29
+ end.reject{|tr| tr.empty? }
30
+
31
+ @values = @values.collect do |trs|
32
+ trs.collect do |td|
33
+ #td =~ /[0-9]/ ?
34
+ td.gsub(/[^0-9]/,'').to_f
35
+ #: td
36
+ end
37
+ end if args.include?(:numeric)
38
+ #throw DogShoe::NotFound if nothing found
39
+ @values
40
+ end
41
+
42
+ def scrub(text)
43
+ text.gsub("\n",'').gsub(/^\s*/,'').gsub(/\s*$/,'')
44
+ end
45
+ end
46
+
47
+ end
48
+ end
File without changes
@@ -0,0 +1,3 @@
1
+ module DogShoe
2
+ VERSION = '0.1.0'
3
+ end
data/spec/base_spec.rb ADDED
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ describe DogShoe::Table do
4
+
5
+ it 'hydrates an object' do
6
+ #stub
7
+ results = EgTable.find(
8
+ url: 'http://example.com',
9
+ css: '.example'
10
+ )
11
+
12
+ expect( results.first[0] ).to eq('1st')
13
+ expect( results.first[1] ).to eq('2nd')
14
+ expect( results.first[2] ).to eq('3rd')
15
+
16
+ expect( results[1][0] ).to eq('2-1st')
17
+
18
+ expect( results[2][0] ).to eq('3-1st')
19
+ end
20
+
21
+ it 'parses a table' do
22
+ end
23
+
24
+ it 'uses headers as attributes' do
25
+ eg = EgTable.new('http://example.com')
26
+ results = eg.css('.example')
27
+ results.first.first = '1st'
28
+ end
29
+
30
+ end
@@ -0,0 +1,31 @@
1
+ <html>
2
+ <body>
3
+ <table class="example">
4
+ <th>
5
+
6
+ </th>
7
+ <tr>
8
+ <td>1st</td>
9
+ <td>2nd</td>
10
+ <td>3rd</td>
11
+ <td>4th</td>
12
+ <td>5th</td>
13
+ </tr>
14
+ <tr>
15
+ <td>2-1st</td>
16
+ <td>2-2nd</td>
17
+ <td>2-3rd</td>
18
+ <td>2-4th</td>
19
+ <td>2-5th</td>
20
+ </tr>
21
+ <tr>
22
+ <td>3-1st</td>
23
+ <td>3-2nd</td>
24
+ <td>3-3rd</td>
25
+ <td>3-4th</td>
26
+ <td>3-5th</td>
27
+ </tr>
28
+
29
+ </table>
30
+ </body>
31
+ </html>
@@ -0,0 +1,39 @@
1
+ <html>
2
+ <body>
3
+ <table class="example">
4
+ <tr>
5
+ <th>first</th>
6
+ <th>second</th>
7
+ <th>first</th>
8
+ <th>first</th>
9
+ <th>first</th>
10
+ </tr>
11
+ <tr>
12
+ <td>1st</td>
13
+ <td>2nd</td>
14
+ <td>3rd</td>
15
+ <td>4th</td>
16
+ <td>5th</td>
17
+ </tr>
18
+ <tr>
19
+ <td>2-1st</td>
20
+ <td>2-2nd</td>
21
+ <td>2-3rd</td>
22
+ <td>2-4th</td>
23
+ <td>2-5th</td>
24
+ </tr>
25
+ <tr>
26
+ <td>3-1st</td>
27
+ <td>3-2nd</td>
28
+ <td>3-3rd</td>
29
+ <td>3-4th</td>
30
+ <td>3-5th</td>
31
+ </tr>
32
+ </table>
33
+ <ul>
34
+ <li>one</li>
35
+ <li>two</li>
36
+ <li>three</li>
37
+ </ul>
38
+ </body>
39
+ </html>
@@ -0,0 +1,7 @@
1
+ class EgTable < DogShoe::Table
2
+
3
+ def self.fetch(_)
4
+ File.read('./spec/html/table.html')
5
+ end
6
+
7
+ end
@@ -0,0 +1,4 @@
1
+ require 'rspec'
2
+ require_relative '../lib/dog_shoe'
3
+
4
+ require 'models/eg_table'
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+
3
+ describe DogShoe::Table do
4
+
5
+ it 'finds a model' do
6
+ #stub
7
+ results = DogShoe::Table.find(
8
+ url: 'http://www.calottery.com/play/scratchers-games/$30-scratchers/30th-anniversary-1183',
9
+ css: '.draw_games'
10
+ )
11
+
12
+ results.first
13
+ end
14
+
15
+ it 'parses a table' do
16
+ end
17
+
18
+ it 'uses headers as attributes' do
19
+ eg = EgTable.new('http://example.com')
20
+ results = eg.css('.example')
21
+ results.first.first = '1st'
22
+ end
23
+
24
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dogshoe
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Trevor Grayson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-10-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 2.0.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 2.0.0
41
+ description: Modeling backed with scraped website data
42
+ email: trevor@ipsumllc.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - README.md
48
+ - lib/dog_shoe.rb
49
+ - lib/dog_shoe/base.rb
50
+ - lib/dog_shoe/table.rb
51
+ - lib/dog_shoe/ui.rb
52
+ - lib/dog_shoe/version.rb
53
+ - spec/base_spec.rb
54
+ - spec/html/table-with-headers.html
55
+ - spec/html/table.html
56
+ - spec/models/eg_table.rb
57
+ - spec/spec_helper.rb
58
+ - spec/table_spec.rb
59
+ homepage: http://github.com/trevorgrayson/dogshoe
60
+ licenses: []
61
+ metadata: {}
62
+ post_install_message:
63
+ rdoc_options: []
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements: []
77
+ rubyforge_project:
78
+ rubygems_version: 2.4.5.1
79
+ signing_key:
80
+ specification_version: 4
81
+ summary: ORM backed by scraped websites
82
+ test_files: []