klepto 0.6.9 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
data/klepto.gemspec CHANGED
@@ -16,6 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
+ gem.license = 'MIT'
19
20
 
20
21
  gem.add_dependency "poltergeist", '~> 1.3.0'
21
22
  gem.add_dependency "capybara", '~> 2.1.0'
@@ -57,10 +57,9 @@ module Klepto
57
57
  @url_to_structure = _url
58
58
  Klepto.logger.debug("Fetching #{@url_to_structure}")
59
59
 
60
- Capybara.using_driver use_driver do
61
- visit @url_to_structure
62
- page
63
- end
60
+ Capybara.current_driver = Capybara.javascript_driver = use_driver
61
+ visit @url_to_structure
62
+ page
64
63
  end
65
64
  end
66
65
  end
@@ -1,3 +1,3 @@
1
1
  module Klepto
2
- VERSION = "0.6.9"
2
+ VERSION = "0.7.0"
3
3
  end
data/phantom/test.js ADDED
@@ -0,0 +1,100 @@
1
+ /*
2
+ CoffeeScript
3
+ JS Lint
4
+ PhantomJS
5
+
6
+ Ruby 'configuration' gem
7
+ Ruby blocks -> Javascript -> Ruby OR Javascript post processors
8
+ Ruby blocks -> Assertion? Auto generate cucumbers? OR callbacks on node not found?
9
+ https://github.com/ariya/phantomjs/wiki/API-Reference-WebPage
10
+
11
+ Config.defaults {
12
+ on(200,'2xx', :redirect){}
13
+ on('4xx'){}
14
+ on('5xx'){}
15
+ on(:timeout){}
16
+ on(:abort){}
17
+ headers({})
18
+ cookies({})
19
+ agent "Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/534.34 (KHTML, like Gecko) Klepto/#{Klepto::Version} Safari/534.34"
20
+ }
21
+ Bot.new("http://google.com")do
22
+ config{
23
+ # merges with Defaults, creates a Configuration
24
+ url "http://google.com"
25
+ auto_structure false # stops it from running structure (@bot.process! will run it)
26
+ abort_on_failure true
27
+ agent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22";
28
+
29
+ headers({})
30
+ cookies({})
31
+
32
+ on(200,'2xx', :redirect){}
33
+ on('4xx'){}
34
+ on('5xx'){}
35
+ on(:timeout){}
36
+ on(:abort){}
37
+
38
+ before(:get){}
39
+ after(:get){}
40
+ before(:structure){}
41
+ after(:structure){}
42
+ }
43
+
44
+ structure{
45
+ # Should yield against Proxy so method_missing and queueing isn't in Bot
46
+ }
47
+ end
48
+
49
+ */
50
+ var page = require('webpage').create(),
51
+ system = require('system'),
52
+ lt, pt, t, currentAddress, requestedAddress;
53
+
54
+ page.settings.userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22";
55
+ page.settings.loadImages = false;
56
+
57
+ page.onUrlChanged = function(targetUrl){
58
+ currentAddress = targetUrl;
59
+ console.log("Redirecting to: " + currentAddress);
60
+ }
61
+
62
+ page.onResourceReceived = function(resource) {
63
+ if (resource.stage === 'end' && resource.status == 200 && resource.url == currentAddress) {
64
+ lt = Date.now() - t;
65
+ console.log("Crawling: " + resource.url);
66
+ page.includeJs("http://ajax.googleapis.com/ajax/libs/jquery/1.6.1/jquery.min.js", function() {
67
+ var title = page.evaluate(function(){
68
+ return $("title").text();
69
+ });
70
+
71
+ pt = Date.now() - t;
72
+ var structure = JSON.stringify({
73
+ title: title,
74
+ _meta: {
75
+ loadTime: lt,
76
+ parseTime: pt,
77
+ redirectOccurred: (requestedAddress != resource.url),
78
+ requestedAddress: requestedAddress,
79
+ currentAddress: resource.url,
80
+ httpCode: resource.status
81
+ }
82
+ });
83
+ system.stdout.write(structure);
84
+
85
+ phantom.exit();
86
+ });
87
+ } else if(resource.stage === 'end' && resource.status != 200 && resource.url == currentAddress){
88
+ console.log("Oops: " + resource.status);
89
+ phantom.exit();
90
+ } else {/* NOOP*/}
91
+ }
92
+
93
+ if (system.args.length === 1) {
94
+ console.log('Usage: test.js <some URL>');
95
+ phantom.exit(1);
96
+ } else {
97
+ t = Date.now();
98
+ currentAddress = requestedAddress = system.args[1];
99
+ page.open(requestedAddress);
100
+ }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: klepto
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.9
4
+ version: 0.7.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-10 00:00:00.000000000 Z
12
+ date: 2013-07-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: poltergeist
@@ -115,6 +115,7 @@ files:
115
115
  - lib/klepto/structure.rb
116
116
  - lib/klepto/tasks.rb
117
117
  - lib/klepto/version.rb
118
+ - phantom/test.js
118
119
  - samples/bieber.html
119
120
  - samples/concept.rb
120
121
  - spec/cassettes/Klepto_Crawler/dsl_interaction/should_crawl_the_resource.yml
@@ -133,7 +134,8 @@ files:
133
134
  - spec/orm/database.example.yml
134
135
  - spec/spec_helper.rb
135
136
  homepage: http://github.com/coryodaniel/klepto
136
- licenses: []
137
+ licenses:
138
+ - MIT
137
139
  post_install_message:
138
140
  rdoc_options: []
139
141
  require_paths: