dogshoe 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 073f3b5eeebc94df2ba0b816f19b8e48ada3fb34
4
+ data.tar.gz: 7616865d53111a61e6476833ff325e4f94e762f4
5
+ SHA512:
6
+ metadata.gz: 2ebfe357c0787a5fed3d6f738a3274c3b2aa164470e681da18c0abe7f3654d5b4ca2500a8c4dded6bbf805abd804263174584b2283481035862a5b7dd6a9c20a
7
+ data.tar.gz: f107343ec884c130b1784ab3c01981350f56e69be22b2a24145d3abe43e3536ad377958da3cd78bb31ee63d4e07b041be9e4e0b5c0bcd42f22850a6549aad4ee
data/README.md ADDED
@@ -0,0 +1,30 @@
1
+ # dogshoe
2
+ Ruby ORM backed by scraped websites
3
+
4
+ Say we are given the following HTML on anyotherwebpage.com
5
+
6
+ <table class="some_class">
7
+ <tr>
8
+ <th>Some Thing</th><th>Some Thing Else</th>
9
+ </tr>
10
+ <tr>
11
+ <td>first value</td><td>1000</td>
12
+ </tr>
13
+ <tr>
14
+ <td>second value</td><td>2000</td>
15
+ </tr>
16
+ </table>
17
+
18
+ Let's stop being polite, and start modeling!
19
+
20
+ things = DogShoe::Table.find(
21
+ url: 'http://anyotherwebpage.com',
22
+ css: '.some_class'
23
+ )
24
+
25
+ => [#<DogShoe::Table>, #<DogShoe::Table>]
26
+
27
+
28
+ this.first.some_thing
29
+
30
+ => 'first value'
data/lib/dog_shoe.rb ADDED
@@ -0,0 +1,4 @@
1
+ #require 'dog_shoe/dog_shoe'
2
+ require 'dog_shoe/base'
3
+ require 'dog_shoe/table'
4
+ require 'dog_shoe/ui'
@@ -0,0 +1,50 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+ module DogShoe
5
+ class Base
6
+
7
+ attr_reader :doc
8
+
9
+ def initialize(attributes = {})
10
+ @values = attributes[:values]
11
+ @keys = attributes[:keys].collect{|k| k.to_sym}
12
+ end
13
+
14
+ def [](key)
15
+ @values[key]
16
+ end
17
+
18
+ class << self
19
+ def parse(body)
20
+ Nokogiri::HTML(body)
21
+ end
22
+
23
+ def fetch(uri)
24
+ open(uri)
25
+ end
26
+
27
+ def css(selector)
28
+ @doc.css(selector)
29
+ end
30
+
31
+ def values
32
+ @values
33
+ end
34
+
35
+ def find(attrs)
36
+ @doc = parse(fetch(attrs[:url]))
37
+ results = css(attrs[:css])
38
+ #self.class.new(results)
39
+ results
40
+ end
41
+
42
+ end
43
+
44
+ def method_missing(name, *args, &block)
45
+ super unless @keys.include? name
46
+ @values[@keys.index(name)]
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,48 @@
1
+ module DogShoe
2
+ class Table < Base
3
+
4
+ class << self
5
+ def find(attrs)
6
+ @doc = parse(fetch(attrs[:url]))
7
+ css(attrs[:css])
8
+ @values.collect do |result|
9
+ new(values: result, keys: @keys)
10
+ end
11
+ end
12
+
13
+ def css(selector, args=[])
14
+ out = super(selector)#, *args)
15
+ table_to_magic(out, *args)
16
+ end
17
+
18
+ def table_to_magic(table, *args)
19
+ trs = table.css('tr')
20
+
21
+ @keys = table.css('th').collect do |td|
22
+ scrub(td.text).gsub(/[^0-9a-zA-Z\s]/,'').gsub(/\s/,'_').downcase
23
+ end
24
+
25
+ @values = trs.collect do |tr|
26
+ tr.css('td').collect do |td|
27
+ td.text
28
+ end
29
+ end.reject{|tr| tr.empty? }
30
+
31
+ @values = @values.collect do |trs|
32
+ trs.collect do |td|
33
+ #td =~ /[0-9]/ ?
34
+ td.gsub(/[^0-9]/,'').to_f
35
+ #: td
36
+ end
37
+ end if args.include?(:numeric)
38
+ #throw DogShoe::NotFound if nothing found
39
+ @values
40
+ end
41
+
42
+ def scrub(text)
43
+ text.gsub("\n",'').gsub(/^\s*/,'').gsub(/\s*$/,'')
44
+ end
45
+ end
46
+
47
+ end
48
+ end
File without changes
@@ -0,0 +1,3 @@
1
+ module DogShoe
2
+ VERSION = '0.1.0'
3
+ end
data/spec/base_spec.rb ADDED
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ describe DogShoe::Table do
4
+
5
+ it 'hydrates an object' do
6
+ #stub
7
+ results = EgTable.find(
8
+ url: 'http://example.com',
9
+ css: '.example'
10
+ )
11
+
12
+ expect( results.first[0] ).to eq('1st')
13
+ expect( results.first[1] ).to eq('2nd')
14
+ expect( results.first[2] ).to eq('3rd')
15
+
16
+ expect( results[1][0] ).to eq('2-1st')
17
+
18
+ expect( results[2][0] ).to eq('3-1st')
19
+ end
20
+
21
+ it 'parses a table' do
22
+ end
23
+
24
+ it 'uses headers as attributes' do
25
+ eg = EgTable.new('http://example.com')
26
+ results = eg.css('.example')
27
+ results.first.first = '1st'
28
+ end
29
+
30
+ end
@@ -0,0 +1,31 @@
1
+ <html>
2
+ <body>
3
+ <table class="example">
4
+ <th>
5
+
6
+ </th>
7
+ <tr>
8
+ <td>1st</td>
9
+ <td>2nd</td>
10
+ <td>3rd</td>
11
+ <td>4th</td>
12
+ <td>5th</td>
13
+ </tr>
14
+ <tr>
15
+ <td>2-1st</td>
16
+ <td>2-2nd</td>
17
+ <td>2-3rd</td>
18
+ <td>2-4th</td>
19
+ <td>2-5th</td>
20
+ </tr>
21
+ <tr>
22
+ <td>3-1st</td>
23
+ <td>3-2nd</td>
24
+ <td>3-3rd</td>
25
+ <td>3-4th</td>
26
+ <td>3-5th</td>
27
+ </tr>
28
+
29
+ </table>
30
+ </body>
31
+ </html>
@@ -0,0 +1,39 @@
1
+ <html>
2
+ <body>
3
+ <table class="example">
4
+ <tr>
5
+ <th>first</th>
6
+ <th>second</th>
7
+ <th>first</th>
8
+ <th>first</th>
9
+ <th>first</th>
10
+ </tr>
11
+ <tr>
12
+ <td>1st</td>
13
+ <td>2nd</td>
14
+ <td>3rd</td>
15
+ <td>4th</td>
16
+ <td>5th</td>
17
+ </tr>
18
+ <tr>
19
+ <td>2-1st</td>
20
+ <td>2-2nd</td>
21
+ <td>2-3rd</td>
22
+ <td>2-4th</td>
23
+ <td>2-5th</td>
24
+ </tr>
25
+ <tr>
26
+ <td>3-1st</td>
27
+ <td>3-2nd</td>
28
+ <td>3-3rd</td>
29
+ <td>3-4th</td>
30
+ <td>3-5th</td>
31
+ </tr>
32
+ </table>
33
+ <ul>
34
+ <li>one</li>
35
+ <li>two</li>
36
+ <li>three</li>
37
+ </ul>
38
+ </body>
39
+ </html>
@@ -0,0 +1,7 @@
1
+ class EgTable < DogShoe::Table
2
+
3
+ def self.fetch(_)
4
+ File.read('./spec/html/table.html')
5
+ end
6
+
7
+ end
@@ -0,0 +1,4 @@
1
+ require 'rspec'
2
+ require_relative '../lib/dog_shoe'
3
+
4
+ require 'models/eg_table'
@@ -0,0 +1,24 @@
1
+ require 'spec_helper'
2
+
3
+ describe DogShoe::Table do
4
+
5
+ it 'finds a model' do
6
+ #stub
7
+ results = DogShoe::Table.find(
8
+ url: 'http://www.calottery.com/play/scratchers-games/$30-scratchers/30th-anniversary-1183',
9
+ css: '.draw_games'
10
+ )
11
+
12
+ results.first
13
+ end
14
+
15
+ it 'parses a table' do
16
+ end
17
+
18
+ it 'uses headers as attributes' do
19
+ eg = EgTable.new('http://example.com')
20
+ results = eg.css('.example')
21
+ results.first.first = '1st'
22
+ end
23
+
24
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dogshoe
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Trevor Grayson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-10-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 2.0.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 2.0.0
41
+ description: Modeling backed with scraped website data
42
+ email: trevor@ipsumllc.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - README.md
48
+ - lib/dog_shoe.rb
49
+ - lib/dog_shoe/base.rb
50
+ - lib/dog_shoe/table.rb
51
+ - lib/dog_shoe/ui.rb
52
+ - lib/dog_shoe/version.rb
53
+ - spec/base_spec.rb
54
+ - spec/html/table-with-headers.html
55
+ - spec/html/table.html
56
+ - spec/models/eg_table.rb
57
+ - spec/spec_helper.rb
58
+ - spec/table_spec.rb
59
+ homepage: http://github.com/trevorgrayson/dogshoe
60
+ licenses: []
61
+ metadata: {}
62
+ post_install_message:
63
+ rdoc_options: []
64
+ require_paths:
65
+ - lib
66
+ required_ruby_version: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirements: []
77
+ rubyforge_project:
78
+ rubygems_version: 2.4.5.1
79
+ signing_key:
80
+ specification_version: 4
81
+ summary: ORM backed by scraped websites
82
+ test_files: []