klepto 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. data/.gitignore +21 -0
  2. data/.rspec +2 -0
  3. data/.rvmrc +1 -0
  4. data/Gemfile +18 -0
  5. data/Guardfile +11 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +129 -0
  8. data/Rakefile +7 -0
  9. data/klepto.gemspec +26 -0
  10. data/lib/klepto.rb +26 -0
  11. data/lib/klepto/bot.rb +59 -0
  12. data/lib/klepto/browser.rb +18 -0
  13. data/lib/klepto/crawler.rb +72 -0
  14. data/lib/klepto/tasks.rb +15 -0
  15. data/lib/klepto/version.rb +3 -0
  16. data/samples/example.rb +49 -0
  17. data/spec/cassettes/Klepto_Crawler/dsl_interaction/should_crawl_the_resource.yml +1960 -0
  18. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_set_of_nodes.yml +114 -0
  19. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_block.yml +114 -0
  20. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_symbol.yml +114 -0
  21. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_scrape_the_node_that_the_crawler_is_scoped_to.yml +114 -0
  22. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_CSS_scope.yml +114 -0
  23. data/spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_desired_syntax.yml +114 -0
  24. data/spec/lib/klepto/bot_spec.rb +40 -0
  25. data/spec/lib/klepto/browser_spec.rb +15 -0
  26. data/spec/lib/klepto/crawler_spec.rb +88 -0
  27. data/spec/lib/klepto/dsl_spec.rb +6 -0
  28. data/spec/lib/klepto_spec.rb +64 -0
  29. data/spec/orm/active_record.rb +36 -0
  30. data/spec/orm/database.example.yml +15 -0
  31. data/spec/spec_helper.rb +32 -0
  32. metadata +157 -0
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+ require 'active_record'
3
+
4
+ ActiveRecord::Base.logger = ActiveSupport::BufferedLogger.new('log/test.log')
5
+ ActiveRecord::Base.establish_connection YAML.load(File.open(File.join(File.dirname(__FILE__), 'database.yml')).read)[ENV['db'] || 'mysql']
6
+
7
+ ActiveRecord::Migration.verbose = false
8
+
9
+ class TestMigration < ActiveRecord::Migration
10
+ def self.up
11
+ create_table :tweets, :force => true do |t|
12
+ t.string :content
13
+ t.string :twitter_id
14
+ t.integer :timestamp
15
+ t.string :permalink
16
+ end
17
+
18
+ create_table :users, :force => true do |t|
19
+ t.string :name
20
+ t.string :username
21
+ end
22
+ end
23
+
24
+ def self.down
25
+ drop_table :tweets
26
+ drop_table :users
27
+ end
28
+ end
29
+
30
+ class Tweet < ActiveRecord::Base
31
+ validates_presence_of :timestamp, :twitter_id, :permalink, :content
32
+ end
33
+
34
+ class User < ActiveRecord::Base
35
+ validates_presence_of :username, :name
36
+ end
@@ -0,0 +1,15 @@
1
+ sqlite3:
2
+ adapter: sqlite3
3
+ database: ":memory:"
4
+ postgresql:
5
+ adapter: postgresql
6
+ database: klepto_test
7
+ username: klepto
8
+ password: klepto
9
+ min_messages: WARNING
10
+ mysql:
11
+ adapter: mysql
12
+ host: localhost
13
+ database: klepto_test
14
+ username: klepto
15
+ password: klepto
@@ -0,0 +1,32 @@
1
+ require 'rubygems'
2
+ require 'bundler/setup'
3
+ require 'debugger'
4
+ require 'simplecov'
5
+ SimpleCov.start do
6
+ add_filter "spec"
7
+ end
8
+
9
+ require 'klepto'
10
+ require 'vcr'
11
+ require 'orm/active_record'
12
+
13
+ def page(url="http://example.com")
14
+ Capybara::Node::Simple.new(open(url).read)
15
+ end
16
+
17
+ VCR.configure do |c|
18
+ c.cassette_library_dir = 'spec/cassettes'
19
+ c.hook_into :fakeweb
20
+ c.configure_rspec_metadata!
21
+ end
22
+
23
+ RSpec.configure do |config|
24
+ config.before(:all) { TestMigration.up }
25
+ config.after(:each){
26
+ User.delete_all
27
+ Tweet.delete_all
28
+ }
29
+ config.after(:all) { TestMigration.down }
30
+ config.treat_symbols_as_metadata_keys_with_true_values = true
31
+ config.filter_run_excluding :skip => true
32
+ end
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: klepto
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Cory O'Daniel
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-10 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: docile
16
+ requirement: &70348289181900 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70348289181900
25
+ - !ruby/object:Gem::Dependency
26
+ name: poltergeist
27
+ requirement: &70348289181040 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - =
31
+ - !ruby/object:Gem::Version
32
+ version: 1.1.0
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70348289181040
36
+ - !ruby/object:Gem::Dependency
37
+ name: capybara
38
+ requirement: &70348289179880 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - =
42
+ - !ruby/object:Gem::Version
43
+ version: 2.0.2
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *70348289179880
47
+ - !ruby/object:Gem::Dependency
48
+ name: nokogiri
49
+ requirement: &70348289179180 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 1.5.6
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: *70348289179180
58
+ - !ruby/object:Gem::Dependency
59
+ name: activesupport
60
+ requirement: &70348289178640 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :runtime
67
+ prerelease: false
68
+ version_requirements: *70348289178640
69
+ - !ruby/object:Gem::Dependency
70
+ name: multi_json
71
+ requirement: &70348289177300 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ~>
75
+ - !ruby/object:Gem::Version
76
+ version: '1.0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: *70348289177300
80
+ description: Tearing up web pages into ActiveRecord resources
81
+ email:
82
+ - github@coryodaniel.com
83
+ executables: []
84
+ extensions: []
85
+ extra_rdoc_files: []
86
+ files:
87
+ - .gitignore
88
+ - .rspec
89
+ - .rvmrc
90
+ - Gemfile
91
+ - Guardfile
92
+ - LICENSE.txt
93
+ - README.md
94
+ - Rakefile
95
+ - klepto.gemspec
96
+ - lib/klepto.rb
97
+ - lib/klepto/bot.rb
98
+ - lib/klepto/browser.rb
99
+ - lib/klepto/crawler.rb
100
+ - lib/klepto/tasks.rb
101
+ - lib/klepto/version.rb
102
+ - samples/example.rb
103
+ - spec/cassettes/Klepto_Crawler/dsl_interaction/should_crawl_the_resource.yml
104
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_set_of_nodes.yml
105
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_block.yml
106
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_symbol.yml
107
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_scrape_the_node_that_the_crawler_is_scoped_to.yml
108
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_CSS_scope.yml
109
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_desired_syntax.yml
110
+ - spec/lib/klepto/bot_spec.rb
111
+ - spec/lib/klepto/browser_spec.rb
112
+ - spec/lib/klepto/crawler_spec.rb
113
+ - spec/lib/klepto/dsl_spec.rb
114
+ - spec/lib/klepto_spec.rb
115
+ - spec/orm/active_record.rb
116
+ - spec/orm/database.example.yml
117
+ - spec/spec_helper.rb
118
+ homepage: http://github.com/coryodaniel/klepto
119
+ licenses: []
120
+ post_install_message:
121
+ rdoc_options: []
122
+ require_paths:
123
+ - lib
124
+ required_ruby_version: !ruby/object:Gem::Requirement
125
+ none: false
126
+ requirements:
127
+ - - ! '>='
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ required_rubygems_version: !ruby/object:Gem::Requirement
131
+ none: false
132
+ requirements:
133
+ - - ! '>='
134
+ - !ruby/object:Gem::Version
135
+ version: '0'
136
+ requirements: []
137
+ rubyforge_project:
138
+ rubygems_version: 1.8.10
139
+ signing_key:
140
+ specification_version: 3
141
+ summary: Tearing up web pages into ActiveRecord resources
142
+ test_files:
143
+ - spec/cassettes/Klepto_Crawler/dsl_interaction/should_crawl_the_resource.yml
144
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_set_of_nodes.yml
145
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_block.yml
146
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_designate_scraping_of_a_single_node_with_a_symbol.yml
147
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_be_able_to_scrape_the_node_that_the_crawler_is_scoped_to.yml
148
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_CSS_scope.yml
149
+ - spec/cassettes/Klepto_Crawler/standard_interaction/should_have_a_desired_syntax.yml
150
+ - spec/lib/klepto/bot_spec.rb
151
+ - spec/lib/klepto/browser_spec.rb
152
+ - spec/lib/klepto/crawler_spec.rb
153
+ - spec/lib/klepto/dsl_spec.rb
154
+ - spec/lib/klepto_spec.rb
155
+ - spec/orm/active_record.rb
156
+ - spec/orm/database.example.yml
157
+ - spec/spec_helper.rb