klepto 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. data/.gitignore +21 -0
  2. data/.rspec +2 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +18 -0
  5. data/Guardfile +11 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +129 -0
  8. data/Rakefile +7 -0
  9. data/klepto.gemspec +26 -0
  10. data/lib/klepto.rb +26 -0
  11. data/lib/klepto/bot.rb +59 -0
  12. data/lib/klepto/browser.rb +18 -0
  13. data/lib/klepto/crawler.rb +72 -0
  14. data/lib/klepto/tasks.rb +15 -0
  15. data/lib/klepto/version.rb +3 -0
  16. data/samples/example.rb +49 -0
  17. data/spec/cassettes/Klepto_Crawler/dsl_interaction/should_crawl_the_resource.yml +1960 -0
  18. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_set_of_nodes.yml +114 -0
  19. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_block.yml +114 -0
  20. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_symbol.yml +114 -0
  21. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_scrape_the_node_that_the_crawler_is_scoped_to.yml +114 -0
  22. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_CSS_scope.yml +114 -0
  23. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_desired_syntax.yml +114 -0
  24. data/spec/lib/klepto/bot_spec.rb +40 -0
  25. data/spec/lib/klepto/browser_spec.rb +15 -0
  26. data/spec/lib/klepto/crawler_spec.rb +88 -0
  27. data/spec/lib/klepto/dsl_spec.rb +6 -0
  28. data/spec/lib/klepto_spec.rb +64 -0
  29. data/spec/orm/active_record.rb +36 -0
  30. data/spec/orm/database.example.yml +15 -0
  31. data/spec/spec_helper.rb +32 -0
  32. metadata +157 -0
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+ require 'active_record'
3
+
4
+ ActiveRecord::Base.logger = ActiveSupport::BufferedLogger.new('log/test.log')
5
+ ActiveRecord::Base.establish_connection YAML.load(File.open(File.join(File.dirname(__FILE__), 'database.yml')).read)[ENV['db'] || 'mysql']
6
+
7
+ ActiveRecord::Migration.verbose = false
8
+
9
+ class TestMigration < ActiveRecord::Migration
10
+ def self.up
11
+ create_table :tweets, :force => true do |t|
12
+ t.string :content
13
+ t.string :twitter_id
14
+ t.integer :timestamp
15
+ t.string :permalink
16
+ end
17
+
18
+ create_table :users, :force => true do |t|
19
+ t.string :name
20
+ t.string :username
21
+ end
22
+ end
23
+
24
+ def self.down
25
+ drop_table :tweets
26
+ drop_table :users
27
+ end
28
+ end
29
+
30
+ class Tweet < ActiveRecord::Base
31
+ validates_presence_of :timestamp, :twitter_id, :permalink, :content
32
+ end
33
+
34
+ class User < ActiveRecord::Base
35
+ validates_presence_of :username, :name
36
+ end
@@ -0,0 +1,15 @@
1
+ sqlite3:
2
+ adapter: sqlite3
3
+ database: ":memory:"
4
+ postgresql:
5
+ adapter: postgresql
6
+ database: klepto_test
7
+ username: klepto
8
+ password: klepto
9
+ min_messages: WARNING
10
+ mysql:
11
+ adapter: mysql
12
+ host: localhost
13
+ database: klepto_test
14
+ username: klepto
15
+ password: klepto
@@ -0,0 +1,32 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'debugger'
4
+ require 'simplecov'
5
+ SimpleCov.start do
6
+ add_filter "spec"
7
+ end
8
+
9
+ require 'klepto'
10
+ require 'vcr'
11
+ require 'orm/active_record'
12
+
13
+ def page(url="http://example.com")
14
+ Capybara::Node::Simple.new(open(url).read)
15
+ end
16
+
17
+ VCR.configure do |c|
18
+ c.cassette_library_dir = 'spec/cassettes'
19
+ c.hook_into :fakeweb
20
+ c.configure_rspec_metadata!
21
+ end
22
+
23
+ RSpec.configure do |config|
24
+ config.before(:all) { TestMigration.up }
25
+ config.after(:each){
26
+ User.delete_all
27
+ Tweet.delete_all
28
+ }
29
+ config.after(:all) { TestMigration.down }
30
+ config.treat_symbols_as_metadata_keys_with_true_values = true
31
+ config.filter_run_excluding :skip => true
32
+ end
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: klepto
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Cory O'Daniel
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-10 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: docile
16
+ requirement: &70348289181900 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70348289181900
25
+ - !ruby/object:Gem::Dependency
26
+ name: poltergeist
27
+ requirement: &70348289181040 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - =
31
+ - !ruby/object:Gem::Version
32
+ version: 1.1.0
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70348289181040
36
+ - !ruby/object:Gem::Dependency
37
+ name: capybara
38
+ requirement: &70348289179880 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - =
42
+ - !ruby/object:Gem::Version
43
+ version: 2.0.2
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *70348289179880
47
+ - !ruby/object:Gem::Dependency
48
+ name: nokogiri
49
+ requirement: &70348289179180 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.5.6
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: *70348289179180
58
+ - !ruby/object:Gem::Dependency
59
+ name: activesupport
60
+ requirement: &70348289178640 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :runtime
67
+ prerelease: false
68
+ version_requirements: *70348289178640
69
+ - !ruby/object:Gem::Dependency
70
+ name: multi_json
71
+ requirement: &70348289177300 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ~>
75
+ - !ruby/object:Gem::Version
76
+ version: '1.0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: *70348289177300
80
+ description: Tearing up web pages into ActiveRecord resources
81
+ email:
82
+ - github@coryodaniel.com
83
+ executables: []
84
+ extensions: []
85
+ extra_rdoc_files: []
86
+ files:
87
+ - .gitignore
88
+ - .rspec
89
+ - .rvmrc
90
+ - Gemfile
91
+ - Guardfile
92
+ - LICENSE.txt
93
+ - README.md
94
+ - Rakefile
95
+ - klepto.gemspec
96
+ - lib/klepto.rb
97
+ - lib/klepto/bot.rb
98
+ - lib/klepto/browser.rb
99
+ - lib/klepto/crawler.rb
100
+ - lib/klepto/tasks.rb
101
+ - lib/klepto/version.rb
102
+ - samples/example.rb
103
+ - spec/cassettes/Klepto_Crawler/dsl_interaction/should_crawl_the_resource.yml
104
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_set_of_nodes.yml
105
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_block.yml
106
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_symbol.yml
107
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_scrape_the_node_that_the_crawler_is_scoped_to.yml
108
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_CSS_scope.yml
109
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_desired_syntax.yml
110
+ - spec/lib/klepto/bot_spec.rb
111
+ - spec/lib/klepto/browser_spec.rb
112
+ - spec/lib/klepto/crawler_spec.rb
113
+ - spec/lib/klepto/dsl_spec.rb
114
+ - spec/lib/klepto_spec.rb
115
+ - spec/orm/active_record.rb
116
+ - spec/orm/database.example.yml
117
+ - spec/spec_helper.rb
118
+ homepage: http://github.com/coryodaniel/klepto
119
+ licenses: []
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ none: false
126
+ requirements:
127
+ - - ! '>='
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ required_rubygems_version: !ruby/object:Gem::Requirement
131
+ none: false
132
+ requirements:
133
+ - - ! '>='
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ requirements: []
137
+ rubyforge_project:
138
+ rubygems_version: 1.8.10
139
+ signing_key:
140
+ specification_version: 3
141
+ summary: Tearing up web pages into ActiveRecord resources
142
+ test_files:
143
+ - spec/cassettes/Klepto_Crawler/dsl_interaction/should_crawl_the_resource.yml
144
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_set_of_nodes.yml
145
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_block.yml
146
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_symbol.yml
147
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_scrape_the_node_that_the_crawler_is_scoped_to.yml
148
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_CSS_scope.yml
149
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_desired_syntax.yml
150
+ - spec/lib/klepto/bot_spec.rb
151
+ - spec/lib/klepto/browser_spec.rb
152
+ - spec/lib/klepto/crawler_spec.rb
153
+ - spec/lib/klepto/dsl_spec.rb
154
+ - spec/lib/klepto_spec.rb
155
+ - spec/orm/active_record.rb
156
+ - spec/orm/database.example.yml
157
+ - spec/spec_helper.rb