sitediff 0.0.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +5 -5
  2. data/.eslintignore +1 -0
  3. data/.eslintrc.json +28 -0
  4. data/.project +11 -0
  5. data/.rubocop.yml +179 -0
  6. data/.rubocop_todo.yml +51 -0
  7. data/CHANGELOG.md +28 -0
  8. data/Dockerfile +33 -0
  9. data/Gemfile +11 -0
  10. data/Gemfile.lock +85 -0
  11. data/INSTALLATION.md +146 -0
  12. data/LICENSE +339 -0
  13. data/README.md +810 -0
  14. data/Rakefile +12 -0
  15. data/Thorfile +135 -0
  16. data/bin/sitediff +9 -2
  17. data/config/.gitkeep +0 -0
  18. data/config/sanitize_domains.example.yaml +8 -0
  19. data/config/sitediff.example.yaml +81 -0
  20. data/docker-compose.test.yml +3 -0
  21. data/lib/sitediff/api.rb +276 -0
  22. data/lib/sitediff/cache.rb +57 -8
  23. data/lib/sitediff/cli.rb +156 -176
  24. data/lib/sitediff/config/creator.rb +61 -77
  25. data/lib/sitediff/config/preset.rb +75 -0
  26. data/lib/sitediff/config.rb +436 -31
  27. data/lib/sitediff/crawler.rb +27 -21
  28. data/lib/sitediff/diff.rb +32 -9
  29. data/lib/sitediff/fetch.rb +10 -3
  30. data/lib/sitediff/files/diff.html.erb +20 -2
  31. data/lib/sitediff/files/jquery.min.js +2 -0
  32. data/lib/sitediff/files/normalize.css +349 -0
  33. data/lib/sitediff/files/report.html.erb +171 -0
  34. data/lib/sitediff/files/sidebyside.html.erb +5 -2
  35. data/lib/sitediff/files/sitediff.css +303 -30
  36. data/lib/sitediff/files/sitediff.js +367 -0
  37. data/lib/sitediff/presets/drupal.yaml +63 -0
  38. data/lib/sitediff/report.rb +254 -0
  39. data/lib/sitediff/result.rb +50 -20
  40. data/lib/sitediff/sanitize/dom_transform.rb +47 -8
  41. data/lib/sitediff/sanitize/regexp.rb +24 -3
  42. data/lib/sitediff/sanitize.rb +81 -12
  43. data/lib/sitediff/uriwrapper.rb +65 -23
  44. data/lib/sitediff/webserver/resultserver.rb +30 -33
  45. data/lib/sitediff/webserver.rb +15 -3
  46. data/lib/sitediff.rb +130 -83
  47. data/misc/sitediff - overview report.png +0 -0
  48. data/misc/sitediff - page report.png +0 -0
  49. data/package-lock.json +878 -0
  50. data/package.json +25 -0
  51. data/sitediff.gemspec +51 -0
  52. metadata +91 -29
  53. data/lib/sitediff/files/html_report.html.erb +0 -66
  54. data/lib/sitediff/files/rules/drupal.yaml +0 -63
  55. data/lib/sitediff/rules.rb +0 -65
data/package.json ADDED
@@ -0,0 +1,25 @@
1
+ {
2
+ "name": "sitediff",
3
+ "version": "1.0.0",
4
+ "description": "Sitediff Javascript",
5
+ "main": "./sitediff/files/sitediff.js",
6
+ "directories": {
7
+ "lib": "lib"
8
+ },
9
+ "scripts": {
10
+ "lint": "eslint --debug ./lib/sitediff/files/*.js"
11
+ },
12
+ "repository": {
13
+ "type": "git",
14
+ "url": "git+https://github.com/evolvingweb/sitediff.git"
15
+ },
16
+ "author": "",
17
+ "license": "GPL-2.0-only",
18
+ "bugs": {
19
+ "url": "https://github.com/evolvingweb/sitediff/issues"
20
+ },
21
+ "homepage": "https://github.com/evolvingweb/sitediff#readme",
22
+ "devDependencies": {
23
+ "eslint": "^7.5.0"
24
+ }
25
+ }
data/sitediff.gemspec ADDED
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'sitediff'
5
+ s.version = '1.2.0'
6
+ s.required_ruby_version = '>= 3.1.2'
7
+ s.summary = 'Compare two versions of a site with ease!'
8
+ s.description = <<DESC
9
+ SiteDiff makes it easy to see differences between two versions of a website. It accepts a set of paths to compare two versions of the site together with potential normalization/sanitization rules. From the provided paths and configuration SiteDiff generates an HTML report of all the status of HTML comparison between the given paths together with a readable diff-like HTML for each specified path containing the differences between the two versions of the site. It is useful tool for QAing re-deployments, site upgrades, etc.
10
+ DESC
11
+ s.license = 'GPL-2.0'
12
+ s.authors = ['Alex Dergachev', 'Amir Kadivar', 'Dave Vasilevsky']
13
+ s.homepage = 'https://sitediff.io/'
14
+ s.email = 'alex@evolvingweb.ca'
15
+ s.metadata = {
16
+ 'source_code_uri' => 'https://github.com/evolvingweb/sitediff'
17
+ }
18
+
19
+ # Specify which files should be added to the gem when it is released.
20
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
+ s.files = Dir.chdir(File.expand_path(__dir__)) do
22
+ `git ls-files -z`.split("\x0").reject do |f|
23
+ (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
24
+ end
25
+ end
26
+ s.require_paths = ['lib']
27
+ # s.files = Dir.glob('lib/**/*.rb') +
28
+ # Dir.glob('lib/sitediff/files/*') +
29
+ # Dir.glob('lib/sitediff/files/rules/*.yaml')
30
+ s.bindir = 'bin'
31
+ s.executables = 'sitediff'
32
+
33
+ # Apparently we require pkg-config
34
+ s.add_dependency 'pkg-config', '~> 1.4'
35
+
36
+ s.add_dependency 'minitar', '~> 0.9'
37
+ s.add_dependency 'thor', '~> 1.2.1'
38
+ s.add_dependency 'typhoeus', '~> 1.4.0'
39
+
40
+ # A bug in rubygems can break rainbow 2.2
41
+ # https://github.com/bundler/bundler/issues/5357
42
+ s.add_dependency 'rainbow', '~> 3.1.1'
43
+
44
+ # Nokogiri 1.7 is not supported on Ruby 2.0.
45
+ s.add_dependency 'nokogiri', '>= 1.13.6'
46
+
47
+ # Diffy and addressable have a max version for Ruby 1.9.
48
+ s.add_dependency 'addressable', '>= 2.5.2', '< 2.9.0'
49
+ s.add_dependency 'diffy', '~> 3.4.0'
50
+ s.add_dependency 'webrick', '>= 1.7'
51
+ end
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitediff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dergachev
8
8
  - Amir Kadivar
9
9
  - Dave Vasilevsky
10
- autorequire:
10
+ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2019-04-02 00:00:00.000000000 Z
13
+ date: 2022-08-29 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: pkg-config
@@ -18,98 +18,132 @@ dependencies:
18
18
  requirements:
19
19
  - - "~>"
20
20
  - !ruby/object:Gem::Version
21
- version: '1.1'
21
+ version: '1.4'
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
25
25
  requirements:
26
26
  - - "~>"
27
27
  - !ruby/object:Gem::Version
28
- version: '1.1'
28
+ version: '1.4'
29
+ - !ruby/object:Gem::Dependency
30
+ name: minitar
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - "~>"
34
+ - !ruby/object:Gem::Version
35
+ version: '0.9'
36
+ type: :runtime
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - "~>"
41
+ - !ruby/object:Gem::Version
42
+ version: '0.9'
29
43
  - !ruby/object:Gem::Dependency
30
44
  name: thor
31
45
  requirement: !ruby/object:Gem::Requirement
32
46
  requirements:
33
47
  - - "~>"
34
48
  - !ruby/object:Gem::Version
35
- version: 0.20.0
49
+ version: 1.2.1
36
50
  type: :runtime
37
51
  prerelease: false
38
52
  version_requirements: !ruby/object:Gem::Requirement
39
53
  requirements:
40
54
  - - "~>"
41
55
  - !ruby/object:Gem::Version
42
- version: 0.20.0
56
+ version: 1.2.1
43
57
  - !ruby/object:Gem::Dependency
44
58
  name: typhoeus
45
59
  requirement: !ruby/object:Gem::Requirement
46
60
  requirements:
47
61
  - - "~>"
48
62
  - !ruby/object:Gem::Version
49
- version: '1.0'
63
+ version: 1.4.0
50
64
  type: :runtime
51
65
  prerelease: false
52
66
  version_requirements: !ruby/object:Gem::Requirement
53
67
  requirements:
54
68
  - - "~>"
55
69
  - !ruby/object:Gem::Version
56
- version: '1.0'
70
+ version: 1.4.0
57
71
  - !ruby/object:Gem::Dependency
58
72
  name: rainbow
59
73
  requirement: !ruby/object:Gem::Requirement
60
74
  requirements:
61
75
  - - "~>"
62
76
  - !ruby/object:Gem::Version
63
- version: 3.0.0
77
+ version: 3.1.1
64
78
  type: :runtime
65
79
  prerelease: false
66
80
  version_requirements: !ruby/object:Gem::Requirement
67
81
  requirements:
68
82
  - - "~>"
69
83
  - !ruby/object:Gem::Version
70
- version: 3.0.0
84
+ version: 3.1.1
71
85
  - !ruby/object:Gem::Dependency
72
86
  name: nokogiri
73
87
  requirement: !ruby/object:Gem::Requirement
74
88
  requirements:
75
- - - "~>"
89
+ - - ">="
76
90
  - !ruby/object:Gem::Version
77
- version: 1.8.2
91
+ version: 1.13.6
78
92
  type: :runtime
79
93
  prerelease: false
80
94
  version_requirements: !ruby/object:Gem::Requirement
81
95
  requirements:
82
- - - "~>"
96
+ - - ">="
83
97
  - !ruby/object:Gem::Version
84
- version: 1.8.2
98
+ version: 1.13.6
85
99
  - !ruby/object:Gem::Dependency
86
100
  name: addressable
87
101
  requirement: !ruby/object:Gem::Requirement
88
102
  requirements:
89
- - - "~>"
103
+ - - ">="
90
104
  - !ruby/object:Gem::Version
91
105
  version: 2.5.2
106
+ - - "<"
107
+ - !ruby/object:Gem::Version
108
+ version: 2.9.0
92
109
  type: :runtime
93
110
  prerelease: false
94
111
  version_requirements: !ruby/object:Gem::Requirement
95
112
  requirements:
96
- - - "~>"
113
+ - - ">="
97
114
  - !ruby/object:Gem::Version
98
115
  version: 2.5.2
116
+ - - "<"
117
+ - !ruby/object:Gem::Version
118
+ version: 2.9.0
99
119
  - !ruby/object:Gem::Dependency
100
120
  name: diffy
101
121
  requirement: !ruby/object:Gem::Requirement
102
122
  requirements:
103
123
  - - "~>"
104
124
  - !ruby/object:Gem::Version
105
- version: 3.2.0
125
+ version: 3.4.0
106
126
  type: :runtime
107
127
  prerelease: false
108
128
  version_requirements: !ruby/object:Gem::Requirement
109
129
  requirements:
110
130
  - - "~>"
111
131
  - !ruby/object:Gem::Version
112
- version: 3.2.0
132
+ version: 3.4.0
133
+ - !ruby/object:Gem::Dependency
134
+ name: webrick
135
+ requirement: !ruby/object:Gem::Requirement
136
+ requirements:
137
+ - - ">="
138
+ - !ruby/object:Gem::Version
139
+ version: '1.7'
140
+ type: :runtime
141
+ prerelease: false
142
+ version_requirements: !ruby/object:Gem::Requirement
143
+ requirements:
144
+ - - ">="
145
+ - !ruby/object:Gem::Version
146
+ version: '1.7'
113
147
  description: " SiteDiff makes it easy to see differences between two versions of
114
148
  a website. It accepts a set of paths to compare two versions of the site together
115
149
  with potential normalization/sanitization rules. From the provided paths and configuration
@@ -123,35 +157,64 @@ executables:
123
157
  extensions: []
124
158
  extra_rdoc_files: []
125
159
  files:
160
+ - ".eslintignore"
161
+ - ".eslintrc.json"
162
+ - ".project"
163
+ - ".rubocop.yml"
164
+ - ".rubocop_todo.yml"
165
+ - CHANGELOG.md
166
+ - Dockerfile
167
+ - Gemfile
168
+ - Gemfile.lock
169
+ - INSTALLATION.md
170
+ - LICENSE
171
+ - README.md
172
+ - Rakefile
173
+ - Thorfile
126
174
  - bin/sitediff
175
+ - config/.gitkeep
176
+ - config/sanitize_domains.example.yaml
177
+ - config/sitediff.example.yaml
178
+ - docker-compose.test.yml
127
179
  - lib/sitediff.rb
180
+ - lib/sitediff/api.rb
128
181
  - lib/sitediff/cache.rb
129
182
  - lib/sitediff/cli.rb
130
183
  - lib/sitediff/config.rb
131
184
  - lib/sitediff/config/creator.rb
185
+ - lib/sitediff/config/preset.rb
132
186
  - lib/sitediff/crawler.rb
133
187
  - lib/sitediff/diff.rb
134
188
  - lib/sitediff/exception.rb
135
189
  - lib/sitediff/fetch.rb
136
190
  - lib/sitediff/files/diff.html.erb
137
- - lib/sitediff/files/html_report.html.erb
191
+ - lib/sitediff/files/jquery.min.js
192
+ - lib/sitediff/files/normalize.css
138
193
  - lib/sitediff/files/pretty_print.xsl
139
- - lib/sitediff/files/rules/drupal.yaml
194
+ - lib/sitediff/files/report.html.erb
140
195
  - lib/sitediff/files/sidebyside.html.erb
141
196
  - lib/sitediff/files/sitediff.css
197
+ - lib/sitediff/files/sitediff.js
198
+ - lib/sitediff/presets/drupal.yaml
199
+ - lib/sitediff/report.rb
142
200
  - lib/sitediff/result.rb
143
- - lib/sitediff/rules.rb
144
201
  - lib/sitediff/sanitize.rb
145
202
  - lib/sitediff/sanitize/dom_transform.rb
146
203
  - lib/sitediff/sanitize/regexp.rb
147
204
  - lib/sitediff/uriwrapper.rb
148
205
  - lib/sitediff/webserver.rb
149
206
  - lib/sitediff/webserver/resultserver.rb
150
- homepage: https://github.com/evolvingweb/sitediff/
207
+ - misc/sitediff - overview report.png
208
+ - misc/sitediff - page report.png
209
+ - package-lock.json
210
+ - package.json
211
+ - sitediff.gemspec
212
+ homepage: https://sitediff.io/
151
213
  licenses:
152
214
  - GPL-2.0
153
- metadata: {}
154
- post_install_message:
215
+ metadata:
216
+ source_code_uri: https://github.com/evolvingweb/sitediff
217
+ post_install_message:
155
218
  rdoc_options: []
156
219
  require_paths:
157
220
  - lib
@@ -159,16 +222,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
159
222
  requirements:
160
223
  - - ">="
161
224
  - !ruby/object:Gem::Version
162
- version: '2.3'
225
+ version: 3.1.2
163
226
  required_rubygems_version: !ruby/object:Gem::Requirement
164
227
  requirements:
165
228
  - - ">="
166
229
  - !ruby/object:Gem::Version
167
230
  version: '0'
168
231
  requirements: []
169
- rubyforge_project:
170
- rubygems_version: 2.5.2.3
171
- signing_key:
232
+ rubygems_version: 3.3.7
233
+ signing_key:
172
234
  specification_version: 4
173
235
  summary: Compare two versions of a site with ease!
174
236
  test_files: []
@@ -1,66 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <!-- important: otherwise chrome will choke on non-ascii characters -->
5
- <meta charset="utf-8" />
6
- <style>
7
- <%= SiteDiff::Diff.css %>
8
- </style>
9
- <title> SiteDiff Report </title>
10
- </head>
11
- <body>
12
- <div class="sitediff">
13
- <div class="legend">
14
- <%
15
- tags = %w[before after]
16
- tags.each do |tag| %>
17
- <% if tags.first != tag %> | <% end %>
18
- <% notes = ['base url']
19
- notes << 'cached' if cache.read_tags.include?(tag.to_sym) %>
20
- <strong><%= tag %></strong> (<%= notes.join(', ') %>):
21
- <a href="<%= eval(tag) %>"><%= eval(tag) %></a>
22
- <% end %>
23
- </div>
24
- <div class="run">
25
- <a href="../run/diff">Rerun diff</a>
26
- </div>
27
- <table class="results">
28
-
29
- <colgroup>
30
- <col class="before-col">
31
- <col class="after-col">
32
- <col class="both-col">
33
- <col class="path-col">
34
- <col class="diff-stat-col">
35
- </colgroup>
36
-
37
- <thead>
38
- <tr>
39
- <th> Before </th>
40
- <th> After </th>
41
- <th> Both </th>
42
- <th> Path </th>
43
- <th> Status </th>
44
- </tr>
45
- </thead>
46
-
47
- <% results.each do |result| %>
48
- <tr class="<%= result.status_text %>">
49
- <td class="before">
50
- <a href="<%= result.url(:before, before, cache) %>">[before]</a>
51
- </td>
52
- <td class="after">
53
- <a href="<%= result.url(:after, after, cache) %>">[after]</a>
54
- </td>
55
- <td class="both">
56
- <a href="/sidebyside<%= result.path %>">[both]</a>
57
- </td>
58
- <td class="path"><%= result.path %></td>
59
- <td class="status"><%= result.link %></td>
60
- </tr>
61
- <% end %>
62
-
63
- </table>
64
- </div>
65
- </body>
66
- </html>
@@ -1,63 +0,0 @@
1
- sanitization:
2
- - title: Strip Drupal.settings
3
- selector: script
4
- pattern: '^(<script>)?jQuery.extend\(Drupal.settings.*$'
5
- - title: Strip IE CSS/JS cache IDs
6
- pattern: '("[^"]*ie\d?\.(js|css))\?[a-z0-9]{6}"'
7
- substitute: '\1'
8
- - title: Strip form build ID
9
- selector: input
10
- pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
11
- substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
12
- - title: Strip view DOM ID
13
- pattern: '(class="view .*) view-dom-id-[a-f0-9]{32}"'
14
- substitute: '\1 view-dom-id-DRUPAL_VIEW_DOM_ID"'
15
- - title: Strip CSS aggregation filenames
16
- selector: link[rel=stylesheet]
17
- pattern: '(href="[^"]*/files/css/css_)[-\w]{40,43}\.css"'
18
- substitute: '\1DRUPAL_AGGREGATED_CSS.css"'
19
- - title: Strip JS aggregation filenames
20
- selector: script
21
- pattern: '(src="[^"]*/files/js/js_)[-\w]{40,43}\.js"'
22
- substitute: '\1DRUPAL_AGGREGATED_JS.js"'
23
- - title: Strip CSS/JS cache IDs
24
- selector: style, script
25
- pattern: '("[^"]*\.(js|css))\?[a-z0-9]{6}"'
26
- substitute: '\1'
27
- - title: Strip Drupal JS version tags
28
- selector: script
29
- pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
30
- substitute: '\1'
31
- - title: Strip domain names from absolute URLs
32
- pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
33
- substitute: '__domain__'
34
- - title: Strip form build ID
35
- selector: input
36
- pattern: 'autocomplete="off" data-drupal-selector="form-[-\w]{40,43}"'
37
- substitute: 'autocomplete="off" data-drupal-selector="form-DRUPAL_FORM_BUILD_ID"'
38
- - title: Strip form build ID 2
39
- selector: input
40
- pattern: 'name="form_build_id" value="form-[-\w]{40,43}"'
41
- substitute: 'name="form_build_id" value="form-DRUPAL_FORM_BUILD_ID"'
42
- - title: Strip Drupal CSS link queries
43
- selector: link
44
- pattern: '\.css\?(\w*)'
45
- substitute: '\.css'
46
- - title: Strip Drupal JS link queries
47
- selector: script
48
- pattern: '\.js\?(\w*)'
49
- substitute: '\.js'
50
- - title: Strip Drupal View-DOM ID
51
- pattern: 'view-dom-id-\w*'
52
- substitute: 'view-dom-id-_ID_'
53
- - title: Strip Drupal View-DOM ID 2
54
- pattern: '(views?_dom_id"?:"?)\w*'
55
- substitute: '\1_ID_'
56
- - title: Ignore Drupal CSS file names
57
- selector: link
58
- pattern: 'css_[-\w]{40,43}(\\|%5C)?\.css'
59
- substitute: 'css__ID__.css'
60
- - title: Ignore Drupal JS file names
61
- selector: script
62
- pattern: 'js_[-\w]{40,43}\\?\.js'
63
- substitute: 'js__ID__.js'
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'sitediff/sanitize/regexp'
4
- require 'pathname'
5
- require 'set'
6
-
7
- class SiteDiff
8
- # Find appropriate rules for a given site
9
- class Rules
10
- def initialize(config, disabled = false)
11
- @disabled = disabled
12
- @config = config
13
- find_sanitization_candidates
14
- @rules = Hash.new { |h, k| h[k] = Set.new }
15
- end
16
-
17
- def find_sanitization_candidates
18
- @candidates = Set.new
19
-
20
- rules_dir = Pathname.new(__FILE__).dirname + 'files' + 'rules'
21
- rules_dir.children.each do |f|
22
- next unless f.file? && f.extname == '.yaml'
23
-
24
- conf = YAML.load_file(f)
25
- @candidates.merge(conf['sanitization'])
26
- end
27
- end
28
-
29
- def handle_page(tag, html, doc)
30
- found = find_rules(html, doc)
31
- @rules[tag].merge(found)
32
- end
33
-
34
- # Yield a set of rules that seem reasonable for this HTML
35
- # assumption: the YAML file is a list of regexp rules only
36
- def find_rules(html, doc)
37
- @candidates.select do |rule|
38
- re = SiteDiff::Sanitizer::Regexp.create(rule)
39
- re.applies?(html, doc)
40
- end
41
- end
42
-
43
- # Find all rules from all rulesets that apply for all pages
44
- def add_config
45
- have_both = @rules.include?(:before)
46
-
47
- r1, r2 = *@rules.values_at(:before, :after)
48
- if have_both
49
- add_section('before', r1 - r2)
50
- add_section('after', r2 - r1)
51
- add_section(nil, r1 & r2)
52
- else
53
- add_section(nil, r2)
54
- end
55
- end
56
-
57
- def add_section(name, rules)
58
- return if rules.empty?
59
-
60
- conf = name ? @config[name] : @config
61
- rules.each { |r| r['disabled'] = true } if @disabled
62
- conf['sanitization'] = rules.to_a.sort_by { |r| r['title'] }
63
- end
64
- end
65
- end