hawkeye 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ae4acca9dcd54f98da25d08abcfd3029e766d632
4
+ data.tar.gz: 8f5217661a6025f5d4f8215495f691579162e602
5
+ SHA512:
6
+ metadata.gz: 292d6837a6c77d1521b5832d13cee7e2d3edcf6260f12b0fa9abc21b3a2982fe215b5d9631b17b2ff04c3f69929f5fbac1299924f1251f55c09b92f6d744ee47
7
+ data.tar.gz: b5e717e2a474b924e7b3f654434b996dabca6c52aef9aeadcb6275d789c7d92ddcd629a4faa503998ea6569d82c47b3ddc7edb81891b6299326da3265059874e
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,27 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ gem 'rails' # https://github.com/rails/rails
7
+
8
+ gem 'eggs' #, path: '~/Rails/Gems/eggs' # https://github.com/abcbots/eggs
9
+ gem 'nokogiri' # https://github.com/sparklemotion/nokogiri
10
+ gem 'sanitize' # https://github.com/rgrove/sanitize
11
+
12
+ gemspec
13
+
14
+ # Add dependencies to develop your gem here.
15
+ # Include everything needed to run rake, tests, features, etc.
16
+ group :test do
17
+ gem "activesupport"
18
+ gem 'nokogiri' # https://github.com/sparklemotion/nokogiri
19
+ gem "rspec", "~> 2.8.0"
20
+ gem "yard", "~> 0.7"
21
+ gem "rdoc", "~> 3.12"
22
+ gem "cucumber", ">= 0"
23
+ gem "bundler", "~> 1.0"
24
+ gem "jeweler", "~> 1.8.7"
25
+ gem "simplecov", :require => false # https://github.com/colszowka/simplecov
26
+ gem "pry"
27
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,129 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ hawkeye (0.0.7)
5
+ eggs
6
+ hawkeye
7
+ nokogiri
8
+ rails
9
+ sanitize
10
+
11
+ GEM
12
+ remote: http://rubygems.org/
13
+ specs:
14
+ actionmailer (0.6.1)
15
+ actionpack (>= 0.9.5)
16
+ actionpack (1.4.0)
17
+ activerecord (1.6.0)
18
+ activesupport (4.0.1)
19
+ i18n (~> 0.6, >= 0.6.4)
20
+ minitest (~> 4.2)
21
+ multi_json (~> 1.3)
22
+ thread_safe (~> 0.1)
23
+ tzinfo (~> 0.3.37)
24
+ addressable (2.3.5)
25
+ atomic (1.1.14)
26
+ builder (3.2.2)
27
+ coderay (1.0.9)
28
+ cucumber (1.3.8)
29
+ builder (>= 2.1.2)
30
+ diff-lcs (>= 1.1.3)
31
+ gherkin (~> 2.12.1)
32
+ multi_json (>= 1.7.5, < 2.0)
33
+ multi_test (>= 0.0.2)
34
+ diff-lcs (1.1.3)
35
+ eggs (0.0.7)
36
+ activesupport
37
+ faraday (0.8.8)
38
+ multipart-post (~> 1.2.0)
39
+ gherkin (2.12.2)
40
+ multi_json (~> 1.3)
41
+ git (1.2.6)
42
+ github_api (0.10.1)
43
+ addressable
44
+ faraday (~> 0.8.1)
45
+ hashie (>= 1.2)
46
+ multi_json (~> 1.4)
47
+ nokogiri (~> 1.5.2)
48
+ oauth2
49
+ hashie (2.0.5)
50
+ highline (1.6.20)
51
+ httpauth (0.2.0)
52
+ i18n (0.6.5)
53
+ jeweler (1.8.8)
54
+ builder
55
+ bundler (~> 1.0)
56
+ git (>= 1.2.5)
57
+ github_api (= 0.10.1)
58
+ highline (>= 1.6.15)
59
+ nokogiri (= 1.5.10)
60
+ rake
61
+ rdoc
62
+ json (1.8.1)
63
+ jwt (0.1.8)
64
+ multi_json (>= 1.5)
65
+ method_source (0.8.2)
66
+ minitest (4.7.5)
67
+ multi_json (1.8.2)
68
+ multi_test (0.0.2)
69
+ multi_xml (0.5.5)
70
+ multipart-post (1.2.0)
71
+ nokogiri (1.5.10)
72
+ oauth2 (0.9.2)
73
+ faraday (~> 0.8)
74
+ httpauth (~> 0.2)
75
+ jwt (~> 0.1.4)
76
+ multi_json (~> 1.0)
77
+ multi_xml (~> 0.5)
78
+ rack (~> 1.2)
79
+ pry (0.9.12.2)
80
+ coderay (~> 1.0.5)
81
+ method_source (~> 0.8)
82
+ slop (~> 3.4)
83
+ rack (1.5.2)
84
+ rails (0.9.5)
85
+ actionmailer (>= 0.6.1)
86
+ actionpack (>= 1.4.0)
87
+ activerecord (>= 1.6.0)
88
+ rake (>= 0.4.15)
89
+ rake (10.1.0)
90
+ rdoc (3.12.2)
91
+ json (~> 1.4)
92
+ rspec (2.8.0)
93
+ rspec-core (~> 2.8.0)
94
+ rspec-expectations (~> 2.8.0)
95
+ rspec-mocks (~> 2.8.0)
96
+ rspec-core (2.8.0)
97
+ rspec-expectations (2.8.0)
98
+ diff-lcs (~> 1.1.2)
99
+ rspec-mocks (2.8.0)
100
+ sanitize (2.0.6)
101
+ nokogiri (>= 1.4.4)
102
+ simplecov (0.7.1)
103
+ multi_json (~> 1.0)
104
+ simplecov-html (~> 0.7.1)
105
+ simplecov-html (0.7.1)
106
+ slop (3.4.6)
107
+ thread_safe (0.1.3)
108
+ atomic
109
+ tzinfo (0.3.38)
110
+ yard (0.8.7.3)
111
+
112
+ PLATFORMS
113
+ ruby
114
+
115
+ DEPENDENCIES
116
+ activesupport
117
+ bundler (~> 1.0)
118
+ cucumber
119
+ eggs
120
+ hawkeye!
121
+ jeweler (~> 1.8.7)
122
+ nokogiri
123
+ pry
124
+ rails
125
+ rdoc (~> 3.12)
126
+ rspec (~> 2.8.0)
127
+ sanitize
128
+ simplecov
129
+ yard (~> 0.7)
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 Dave Makena
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,60 @@
1
+ = Hawkeye
2
+
3
+ Hawkeye scrapes, remember, compares and outputs change in web site content.
4
+ Just call
5
+ Hawkeye.refresh(memory, url, css_selector, item_tag, top_id)
6
+ to get refresh Hawkeye's memory.
7
+ If you need to see the latest changes,
8
+ just call
9
+ Hawkeye.show(memory)
10
+ to get the latest changes.
11
+ Happy coding!
12
+
13
+ = Rails 4
14
+
15
+ gem 'hawkeye'
16
+
17
+ bundle
18
+
19
+ gem install hawkeye
20
+
21
+ = Usage
22
+
23
+ == Hawkeye.refresh(memory, url, css_selector, item_tag, top_id)
24
+
25
+ == Hawkeye.show(memory)
26
+
27
+
28
+ == Contributing to hawkeye
29
+
30
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
31
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
32
+ * Fork the project.
33
+ * Start a feature/bugfix branch.
34
+ * Commit and push until you are happy with your contribution.
35
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
36
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
37
+
38
+ == Copyright
39
+
40
+ Copyright (c) 2013 Dave Makena. See LICENSE.txt for
41
+ further details.
42
+ = hawkeye
43
+
44
+ Description goes here.
45
+
46
+ == Contributing to hawkeye
47
+
48
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
49
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
50
+ * Fork the project.
51
+ * Start a feature/bugfix branch.
52
+ * Commit and push until you are happy with your contribution.
53
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
54
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
55
+
56
+ == Copyright
57
+
58
+ Copyright (c) 2013 Dave Makena. See LICENSE.txt for
59
+ further details.
60
+
data/Rakefile ADDED
@@ -0,0 +1,40 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "hawkeye"
18
+ gem.homepage = "http://github.com/abcbots/hawkeye"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Hawkeye Scrapes, Compares, and Reports Change.}
21
+ gem.description = %Q{Hawkeye Scrapes, Compares, and Reports Change via Nokogiri.}
22
+ gem.email = "davemakena@gmail.com"
23
+ gem.authors = ["Dave Makena"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ require 'cucumber/rake/task'
35
+ Cucumber::Rake::Task.new(:features)
36
+
37
+ task :default => :spec
38
+
39
+ require 'yard'
40
+ YARD::Rake::YardocTask.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.7
@@ -0,0 +1,9 @@
1
+ Feature: something something
2
+ In order to something something
3
+ A user something something
4
+ something something something
5
+
6
+ Scenario: something something
7
+ Given inspiration
8
+ When I create a sweet new gem
9
+ Then everyone should see how awesome I am
File without changes
@@ -0,0 +1,13 @@
1
+ require 'bundler'
2
+ begin
3
+ Bundler.setup(:default, :development)
4
+ rescue Bundler::BundlerError => e
5
+ $stderr.puts e.message
6
+ $stderr.puts "Run `bundle install` to install missing gems"
7
+ exit e.status_code
8
+ end
9
+
10
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
11
+ require 'hawkeye'
12
+
13
+ require 'rspec/expectations'
data/hawkeye.gemspec ADDED
@@ -0,0 +1,67 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+ # stub: hawkeye 0.0.7 ruby lib
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "hawkeye"
9
+ s.version = "0.0.7"
10
+
11
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.authors = ["Dave Makena"]
13
+ s.date = "2013-11-06"
14
+ s.description = "Hawkeye Scrapes, Compares, and Reports Change via Nokogiri."
15
+ s.email = "davemakena@gmail.com"
16
+ s.extra_rdoc_files = [
17
+ "LICENSE.txt",
18
+ "README.rdoc"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".rspec",
23
+ "Gemfile",
24
+ "Gemfile.lock",
25
+ "LICENSE.txt",
26
+ "README.rdoc",
27
+ "Rakefile",
28
+ "VERSION",
29
+ "features/hawkeye.feature",
30
+ "features/step_definitions/hawkeye_steps.rb",
31
+ "features/support/env.rb",
32
+ "hawkeye.gemspec",
33
+ "lib/hawkeye.rb",
34
+ "spec/hawkeye_spec.rb",
35
+ "spec/spec_helper.rb"
36
+ ]
37
+ s.homepage = "http://github.com/abcbots/hawkeye"
38
+ s.licenses = ["MIT"]
39
+ s.require_paths = ["lib"]
40
+ s.rubygems_version = "2.1.10"
41
+ s.summary = "Hawkeye Scrapes, Compares, and Reports Change."
42
+
43
+ if s.respond_to? :specification_version then
44
+ s.specification_version = 4
45
+
46
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
47
+ s.add_runtime_dependency(%q<rails>, [">= 0"])
48
+ s.add_runtime_dependency(%q<eggs>, [">= 0"])
49
+ s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
50
+ s.add_runtime_dependency(%q<sanitize>, [">= 0"])
51
+ s.add_runtime_dependency(%q<hawkeye>, [">= 0"])
52
+ else
53
+ s.add_dependency(%q<rails>, [">= 0"])
54
+ s.add_dependency(%q<eggs>, [">= 0"])
55
+ s.add_dependency(%q<nokogiri>, [">= 0"])
56
+ s.add_dependency(%q<sanitize>, [">= 0"])
57
+ s.add_dependency(%q<hawkeye>, [">= 0"])
58
+ end
59
+ else
60
+ s.add_dependency(%q<rails>, [">= 0"])
61
+ s.add_dependency(%q<eggs>, [">= 0"])
62
+ s.add_dependency(%q<nokogiri>, [">= 0"])
63
+ s.add_dependency(%q<sanitize>, [">= 0"])
64
+ s.add_dependency(%q<hawkeye>, [">= 0"])
65
+ end
66
+ end
67
+
data/lib/hawkeye.rb ADDED
@@ -0,0 +1,242 @@
1
+ module Hawkeye
2
+
3
+ require 'eggs'
4
+ require 'hawkeye'
5
+ require 'pry'
6
+ require 'nokogiri'
7
+ require 'sanitize'
8
+
9
+ # == Hawkeye.show(memory)
10
+ # memory =
11
+ # url = http://test.html
12
+ # css_selector = a
13
+ # item_tag = ul
14
+ # top_id = top
15
+ #
16
+ # Hawkeye.show(memory) #=>
17
+ # memory = Hawkeye.refresh(memory, url, css_selector, item_tag, top_id) #=> <ul><a target="_blank" href="http://www.abcbots.com/test">Static</a></ul><center><a href='#top'>###</a></center><hr /><ul><a target="_blank" href="http://www.abcbots.com/test">Dynamic: z7O6sDfl7u</a></ul><center><a href='#top'>###</a></center><hr /><!-- split_tag --><!-- 2013-11-05 18:43:24 -0800 -->
18
+ #
19
+ # Hawkeye.show(memory) #=> <ul><a target="_blank" href="http://www.abcbots.com/test">Static</a></ul><center><a href='#top'>###</a></center><hr /><ul><a target="_blank" href="http://www.abcbots.com/test">Dynamic: z7O6sDfl7u</a></ul><center><a href='#top'>###</a></center><hr /><!-- split_tag --><!-- 2013-11-05 18:43:24 -0800 -->
20
+ # memory = Hawkeye.refresh(memory, url, css_selector, item_tag, top_id) #=> <ul><a target="_blank" href="http://www.abcbots.com/test">Dynamic: lH0eMdGrp5</a></ul><center><a href='#top'>###</a></center><hr /><!-- split_tag --><!-- 2013-11-05 18:43:24 -0800 --><a target="_blank" href="http://www.abcbots.com/test">Static</a>
21
+ #
22
+ # Hawkeye.show(memory) #=> <ul><a target="_blank" href="http://www.abcbots.com/test">Dynamic: lH0eMdGrp5</a></ul><center><a href='#top'>###</a></center><hr /><!-- split_tag --><!-- 2013-11-05 18:43:24 -0800 --><a target="_blank" href="http://www.abcbots.com/test">Static</a>
23
+ #
24
+ def self.show(memory)
25
+ x = set_defaults(memory)
26
+ return show_latest(x)
27
+ end
28
+
29
+ # == Hawkeye.refresh(memory, url, css_selector, item_tag, top_id)
30
+ # memory =
31
+ # url = http://test.html
32
+ # css_selector = a
33
+ # item_tag = ul
34
+ # top_id = top
35
+ # change = Hawkeye.refresh(memory, url, css_selector, item_tag, top_id) #=> <ul><a target="_blank" href="http://www.abcbots.com/test">Static</a></ul><center><a href='#top'>###</a></center><hr /><ul><a target="_blank" href="http://www.abcbots.com/test">Dynamic: 8dsmIVKgMF</a></ul><center><a href='#top'>###</a></center><hr /><!-- split_tag --><!-- 2013-11-05 18:43:24 -0800 -->
36
+ #
37
+ def self.refresh(memory, url, css_selector, item_tag, top_id)
38
+ x = set_defaults(memory, url, css_selector, item_tag, top_id)
39
+ if x[:refresh_ready]
40
+ x = add_new(x)
41
+ x = add_old(x)
42
+ x = add_diff(x)
43
+ x = prepend_diff(x)
44
+ end
45
+ return x[:memory]
46
+ end
47
+
48
+ def self.show_latest(x)
49
+ return x[:memory].to_s.split(x[:split_tag]).first
50
+ end
51
+
52
+ def self.set_defaults(memory, url=nil, css_selector=nil, item_tag="p", top_id="_top_")
53
+ x={}
54
+ x[:memory] = memory
55
+ x[:url] = url
56
+ x[:css_selector] = css_selector
57
+ x[:item_tag] = item_tag
58
+ x[:top_id] = top_id
59
+ x[:refresh_ready] = ((url.to_s!="") and (css_selector.to_s!="") and (item_tag.to_s!="") and (top_id.to_s!=""))
60
+ x[:base_url] = x[:url].to_s.split("/")[0..2].join("/") + "/"
61
+ x[:split_tag] = "<!-- split_tag --><!-- #{Time.now.to_s} -->"
62
+ return x
63
+ end
64
+
65
+ def self.prepend_diff(x)
66
+ if (x[:diff].to_s!="")
67
+ x[:memory] = ((x[:diff]) + (x[:split_tag]) + (x[:diff_old]) )
68
+ else
69
+ x[:memory] = ((x[:split_tag]) + (x[:diff_old]) )
70
+ end
71
+ return x
72
+ end
73
+
74
+ def self.add_diff(x)
75
+
76
+ old_a, old_hsh = get_diff_a_and_hsh(x[:css_selector], x[:old_docs])
77
+ new_a, new_hsh = get_diff_a_and_hsh(x[:css_selector], x[:new_docs])
78
+ diff_a = (new_a - old_a)
79
+
80
+ diff_s = ""
81
+ for diff in diff_a
82
+ if (Sanitize.clean( ( (new_hsh[diff]) ) ).to_s!="")
83
+ diff_s << "<#{x[:item_tag]}>#{new_hsh[diff]}</#{x[:item_tag]}>"
84
+ diff_s << "<center><a href='##{x[:top_id]}'>###</a></center>"
85
+ diff_s << "<hr />"
86
+ end
87
+ end
88
+ x[:diff] = diff_s
89
+
90
+ pre_diff_s = ""
91
+ diff_s = ""
92
+ not_full = true
93
+ while ((old_a.to_a.size.to_i!=0) and not_full)
94
+ diff = old_a.shift
95
+ pre_diff_s = new_hsh[diff].to_s
96
+ if (diff_s + pre_diff_s).size.to_i>7000
97
+ not_full = false
98
+ else
99
+ not_full = true
100
+ diff_s << pre_diff_s
101
+ end
102
+ end
103
+ x[:diff_old] = diff_s
104
+
105
+ return x
106
+ end
107
+
108
+ def self.get_diff_a_and_hsh(css_selector, docs)
109
+ doc_a = []
110
+ doc_hsh = {}
111
+ css_selector = css_selector.split(" ").last
112
+ for doc in docs.css(css_selector)
113
+ d = 1
114
+ doc_s = doc.to_s
115
+ a = doc_s.gsub(/[^a-zA-Z]/i,'').to_s.downcase
116
+ aa = a.split('').uniq
117
+ for b in aa
118
+ d += b.ord
119
+ d *= a.scan(b).size
120
+ end
121
+ doc_id = d.to_s
122
+ unless doc_a.include?(doc_id)
123
+ doc_a << doc_id
124
+ doc_hsh[doc_id] = doc_s
125
+ end
126
+ end
127
+ return doc_a, doc_hsh
128
+ end
129
+
130
+ def self.add_old(x)
131
+ x = add_old_content(x)
132
+ x = add_old_docs(x)
133
+ return x
134
+ end
135
+
136
+ def self.add_new(x)
137
+ x = add_new_content(x)
138
+ x = fix_new_content_base_url(x)
139
+ x = fix_new_content_base_url_target(x)
140
+ x = add_new_docs(x)
141
+ return x
142
+ end
143
+
144
+ def self.fix_new_content_base_url_target(x)
145
+ content = x[:new_content]
146
+ content = content.to_s.gsub("href=", "target='_blank' href=")
147
+ x[:new_content] = content
148
+ return x
149
+ end
150
+
151
+ def self.fix_new_content_base_url(x)
152
+ base_url = x[:base_url]
153
+ content = x[:new_content]
154
+ content = get_content_mix_base_url_selector(base_url, content, "href=\"")
155
+ content = get_content_mix_base_url_selector(base_url, content, "href=\'")
156
+ content = get_content_mix_base_url_selector(base_url, content, "src=\"")
157
+ content = get_content_mix_base_url_selector(base_url, content, "src=\'")
158
+ env_base_url = (base_url+"/")
159
+ content = content.gsub(env_base_url, base_url)
160
+ double_base_url = base_url + base_url
161
+ content = content.gsub(double_base_url, base_url)
162
+ mutant_base_url = (base_url+"http")
163
+ content = content.gsub(mutant_base_url, "http")
164
+ x[:new_content] = content
165
+ return x
166
+ end
167
+
168
+ def self.get_content_mix_base_url_selector(base_url, content, selector)
169
+ selector_with_base_url = selector + base_url
170
+ content = content.to_s.gsub(selector, selector_with_base_url)
171
+ return content.to_s
172
+ end
173
+
174
+
175
+ def self.add_new_docs(x)
176
+ x[:new_docs] = Nokogiri::HTML(x[:new_content])
177
+ return x
178
+ end
179
+
180
+ def self.add_old_docs(x)
181
+ x[:old_docs] = Nokogiri::HTML(x[:old_content])
182
+ return x
183
+ end
184
+
185
+ def self.mix_base_url_to_doc(element, doc, base_url)
186
+
187
+ doc_element = doc[element]
188
+ if (doc_element.to_s!="") and !get_first_match(doc_element, element)
189
+ doc_element_a = doc_element.split("")
190
+ if (doc_element_a.first == "/")
191
+ doc_element_a.shift
192
+ doc_element = doc_element_a.join
193
+ end
194
+ doc[element] = base_url + doc_element
195
+ doc["target"] = "_blank" if element=="href"
196
+ end
197
+ return doc
198
+ end
199
+
200
+ def self.add_new_content(x)
201
+ begin
202
+ if (x[:url].to_s!="")
203
+ if x[:url]=="http://test.html"
204
+ x[:new_content] = "
205
+ <html><body>
206
+ <p><a href='http://www.abcbots.com/test'>Static</a></p>
207
+ <p><a href='http://www.abcbots.com/test'>Dynamic: #{pass=Eggs.key(10)}</a></p>
208
+ <p><a href='http://www.abcbots.com/test'>Dynamic: #{pass}</a></p>
209
+ </body></html>"
210
+ else
211
+ x[:new_content] = open(source_url.to_s).read.to_s.gsub("@", " @")
212
+ end
213
+ else
214
+ x[:new_content] = ""
215
+ end
216
+ rescue
217
+ x[:new_content] = "(Access Denied)"
218
+ end
219
+ return x
220
+ end
221
+
222
+ def self.add_old_content(x)
223
+ x[:old_content] = x[:memory].to_s
224
+ return x
225
+ end
226
+
227
+ # Hola.hi #=> "Hello World!"
228
+ def self.hi
229
+ puts "Hello World!"
230
+ return "Hello World!"
231
+ end
232
+
233
+ # Hola.get_url("http://www.google.com") #=> "<html>...</html>"
234
+ def self.get_url(a="")
235
+ content = open(a.to_s).read.to_s.gsub("@", " @_")
236
+ docs = Nokogiri::HTML(content)
237
+ docs = docs.to_s.gsub(" @_", "@").to_s
238
+ puts a+": "+docs
239
+ return docs.to_s
240
+ end
241
+
242
+ end
@@ -0,0 +1,47 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ require 'hawkeye'
4
+ require 'pry'
5
+ require 'nokogiri'
6
+
7
+ describe "Hawkeye" do
8
+
9
+ it "refreshes" do
10
+ puts
11
+ puts "# == Hawkeye.refresh(memory, url, css_selector, item_tag, top_id)"
12
+ puts "memory = #{memory=''}"
13
+ puts "url = #{url="http://test.html"}"
14
+ puts "css_selector = #{css_selector='a'}"
15
+ puts "item_tag = #{item_tag='ul'}"
16
+ puts "top_id = #{top_id='top'}"
17
+ change=Hawkeye.refresh(memory, url, css_selector, item_tag, top_id)
18
+ puts "change = Hawkeye.refresh(memory, url, css_selector, item_tag, top_id) #=> #{change}"
19
+ puts
20
+ puts "Hawkeye.show(change) #=> #{change}"
21
+ change=Hawkeye.refresh(change, url, css_selector, item_tag, top_id)
22
+ puts "change = Hawkeye.refresh(change, url, css_selector, item_tag, top_id) #=> #{change}"
23
+ puts
24
+ puts "Hawkeye.show(change) #=> #{change}"
25
+ end
26
+
27
+ it "shows" do
28
+ puts
29
+ puts "# == Hawkeye.show(memory)"
30
+ puts "memory = #{memory=''}"
31
+ puts "url = #{url="http://test.html"}"
32
+ puts "css_selector = #{css_selector='a'}"
33
+ puts "item_tag = #{item_tag='ul'}"
34
+ puts "top_id = #{top_id='top'}"
35
+ puts
36
+ puts "Hawkeye.show(memory) #=> #{memory}"
37
+ memory=Hawkeye.refresh(memory, url, css_selector, item_tag, top_id)
38
+ puts "memory = Hawkeye.refresh(memory, url, css_selector, item_tag, top_id) #=> #{memory}"
39
+ puts
40
+ puts "Hawkeye.show(memory) #=> #{memory}"
41
+ memory=Hawkeye.refresh(memory, url, css_selector, item_tag, top_id)
42
+ puts "memory = Hawkeye.refresh(memory, url, css_selector, item_tag, top_id) #=> #{memory}"
43
+ puts
44
+ puts "Hawkeye.show(memory) #=> #{memory}"
45
+ end
46
+
47
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'hawkeye'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,130 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hawkeye
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.7
5
+ platform: ruby
6
+ authors:
7
+ - Dave Makena
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rails
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: eggs
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: sanitize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: hawkeye
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Hawkeye Scrapes, Compares, and Reports Change via Nokogiri.
84
+ email: davemakena@gmail.com
85
+ executables: []
86
+ extensions: []
87
+ extra_rdoc_files:
88
+ - LICENSE.txt
89
+ - README.rdoc
90
+ files:
91
+ - .document
92
+ - .rspec
93
+ - Gemfile
94
+ - Gemfile.lock
95
+ - LICENSE.txt
96
+ - README.rdoc
97
+ - Rakefile
98
+ - VERSION
99
+ - features/hawkeye.feature
100
+ - features/step_definitions/hawkeye_steps.rb
101
+ - features/support/env.rb
102
+ - hawkeye.gemspec
103
+ - lib/hawkeye.rb
104
+ - spec/hawkeye_spec.rb
105
+ - spec/spec_helper.rb
106
+ homepage: http://github.com/abcbots/hawkeye
107
+ licenses:
108
+ - MIT
109
+ metadata: {}
110
+ post_install_message:
111
+ rdoc_options: []
112
+ require_paths:
113
+ - lib
114
+ required_ruby_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - '>='
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - '>='
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ requirements: []
125
+ rubyforge_project:
126
+ rubygems_version: 2.1.10
127
+ signing_key:
128
+ specification_version: 4
129
+ summary: Hawkeye Scrapes, Compares, and Reports Change.
130
+ test_files: []