klepto 0.6.9 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/klepto.gemspec CHANGED
@@ -16,6 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
+ gem.license = 'MIT'
19
20
 
20
21
  gem.add_dependency "poltergeist", '~> 1.3.0'
21
22
  gem.add_dependency "capybara", '~> 2.1.0'
@@ -57,10 +57,9 @@ module Klepto
57
57
  @url_to_structure = _url
58
58
  Klepto.logger.debug("Fetching #{@url_to_structure}")
59
59
 
60
- Capybara.using_driver use_driver do
61
- visit @url_to_structure
62
- page
63
- end
60
+ Capybara.current_driver = Capybara.javascript_driver = use_driver
61
+ visit @url_to_structure
62
+ page
64
63
  end
65
64
  end
66
65
  end
@@ -1,3 +1,3 @@
1
1
  module Klepto
2
- VERSION = "0.6.9"
2
+ VERSION = "0.7.0"
3
3
  end
data/phantom/test.js ADDED
@@ -0,0 +1,100 @@
1
+ /*
2
+ CoffeeScript
3
+ JS Lint
4
+ PhantomJS
5
+
6
+ Ruby 'configuration' gem
7
+ Ruby blocks -> Javascript -> Ruby OR Javascript post processors
8
+ Ruby blocks -> Assertion? Auto generate cucumbers? OR callbacks on node not found?
9
+ https://github.com/ariya/phantomjs/wiki/API-Reference-WebPage
10
+
11
+ Config.defaults {
12
+ on(200,'2xx', :redirect){}
13
+ on('4xx'){}
14
+ on('5xx'){}
15
+ on(:timeout){}
16
+ on(:abort){}
17
+ headers({})
18
+ cookies({})
19
+ agent "Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) Klepto/#{Klepto::Version} Safari/534.34"
20
+ }
21
+ Bot.new("http://google.com")do
22
+ config{
23
+ # merges with Defaults, creates a Configuration
24
+ url "http://google.com"
25
+ auto_structure false # stops it from running structure (@bot.process! will run it)
26
+ abort_on_failure true
27
+ agent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22";
28
+
29
+ headers({})
30
+ cookies({})
31
+
32
+ on(200,'2xx', :redirect){}
33
+ on('4xx'){}
34
+ on('5xx'){}
35
+ on(:timeout){}
36
+ on(:abort){}
37
+
38
+ before(:get){}
39
+ after(:get){}
40
+ before(:structure){}
41
+ after(:structure){}
42
+ }
43
+
44
+ structure{
45
+ # Should yield against Proxy so method_missing and queueing isn't in Bot
46
+ }
47
+ end
48
+
49
+ */
50
+ var page = require('webpage').create(),
51
+ system = require('system'),
52
+ lt, pt, t, currentAddress, requestedAddress;
53
+
54
+ page.settings.userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22";
55
+ page.settings.loadImages = false;
56
+
57
+ page.onUrlChanged = function(targetUrl){
58
+ currentAddress = targetUrl;
59
+ console.log("Redirecting to: " + currentAddress);
60
+ }
61
+
62
+ page.onResourceReceived = function(resource) {
63
+ if (resource.stage === 'end' && resource.status == 200 && resource.url == currentAddress) {
64
+ lt = Date.now() - t;
65
+ console.log("Crawling: " + resource.url);
66
+ page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
67
+ var title = page.evaluate(function(){
68
+ return $("title").text();
69
+ });
70
+
71
+ pt = Date.now() - t;
72
+ var structure = JSON.stringify({
73
+ title: title,
74
+ _meta: {
75
+ loadTime: lt,
76
+ parseTime: pt,
77
+ redirectOccurred: (requestedAddress != resource.url),
78
+ requestedAddress: requestedAddress,
79
+ currentAddress: resource.url,
80
+ httpCode: resource.status
81
+ }
82
+ });
83
+ system.stdout.write(structure);
84
+
85
+ phantom.exit();
86
+ });
87
+ } else if(resource.stage === 'end' && resource.status != 200 && resource.url == currentAddress){
88
+ console.log("Oops: " + resource.status);
89
+ phantom.exit();
90
+ } else {/* NOOP*/}
91
+ }
92
+
93
+ if (system.args.length === 1) {
94
+ console.log('Usage: test.js <some URL>');
95
+ phantom.exit(1);
96
+ } else {
97
+ t = Date.now();
98
+ currentAddress = requestedAddress = system.args[1];
99
+ page.open(requestedAddress);
100
+ }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: klepto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.9
4
+ version: 0.7.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-10 00:00:00.000000000 Z
12
+ date: 2013-07-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: poltergeist
@@ -115,6 +115,7 @@ files:
115
115
  - lib/klepto/structure.rb
116
116
  - lib/klepto/tasks.rb
117
117
  - lib/klepto/version.rb
118
+ - phantom/test.js
118
119
  - samples/bieber.html
119
120
  - samples/concept.rb
120
121
  - spec/cassettes/Klepto_Crawler/dsl_interaction/should_crawl_the_resource.yml
@@ -133,7 +134,8 @@ files:
133
134
  - spec/orm/database.example.yml
134
135
  - spec/spec_helper.rb
135
136
  homepage: http://github.com/coryodaniel/klepto
136
- licenses: []
137
+ licenses:
138
+ - MIT
137
139
  post_install_message:
138
140
  rdoc_options: []
139
141
  require_paths: