ipynbdiff 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 494a85c40bddb17c1e26f87b0ed2429c880de0ff016dbe5c01a576bb2faeb728
4
+ data.tar.gz: 707cda8c1a84919811fdb3502398a3467c4b8224e90a160537c22f8c4bd2b092
5
+ SHA512:
6
+ metadata.gz: efc5a2501aadcddc2c4857ff14bed582324b5e4ed25956791bba4b615d6bb26311edb11823984351d4f880de5d14b8caffee8063dde260b3b9912c77d408e059
7
+ data.tar.gz: b3c374a1c6314967d079b754bfbce3a8f8177ccb1f82431dc119e6d91ac4b9045f03b7d8b6998c71435525c447b4e7bae09e1a406f4c5372e098ece1c69d547b
data/.VERSION.TMPL ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "GEM_VERSION"
5
+ end
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ .tool-versions
2
+ .bundle
3
+
4
+
5
+ *.gem
6
+
data/.gitlab-ci.yml ADDED
@@ -0,0 +1,32 @@
1
+ image: ruby:2.7
2
+
3
+ stages:
4
+ # - test
5
+ # - build
6
+ - deploy
7
+
8
+ # specs:
9
+ # stage: test
10
+ # script:
11
+ # - bundle install
12
+ # - bundle exec rspec
13
+
14
+ # build-gem:
15
+ # stage: build
16
+ # script:
17
+ # - bundle install
18
+ # - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.7/ > lib/version.rb
19
+ # - gem build ipynbdiff.gemspec
20
+ # artifacts:
21
+ # paths:
22
+ # - ipynbdiff-0.0.7.gem
23
+
24
+ deploy-gem:
25
+ stage: deploy
26
+ script:
27
+ - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.7/ > lib/version.rb
28
+ - echo $GEM_HOST_API_KEY
29
+ - bundle install
30
+ - gem build ipynbdiff.gemspec
31
+ - gem push ipynbdiff-0.0.7.gem
32
+
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ gem 'diffy', '3.3.0'
6
+ gem 'json', '2.5.1'
7
+ gem 'rspec', '3.10.0'
data/Gemfile.lock ADDED
@@ -0,0 +1,31 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.4.4)
5
+ diffy (3.3.0)
6
+ json (2.5.1)
7
+ rspec (3.10.0)
8
+ rspec-core (~> 3.10.0)
9
+ rspec-expectations (~> 3.10.0)
10
+ rspec-mocks (~> 3.10.0)
11
+ rspec-core (3.10.1)
12
+ rspec-support (~> 3.10.0)
13
+ rspec-expectations (3.10.1)
14
+ diff-lcs (>= 1.2.0, < 2.0)
15
+ rspec-support (~> 3.10.0)
16
+ rspec-mocks (3.10.2)
17
+ diff-lcs (>= 1.2.0, < 2.0)
18
+ rspec-support (~> 3.10.0)
19
+ rspec-support (3.10.2)
20
+
21
+ PLATFORMS
22
+ ruby
23
+ x86_64-darwin-20
24
+
25
+ DEPENDENCIES
26
+ diffy (= 3.3.0)
27
+ json (= 2.5.1)
28
+ rspec (= 3.10.0)
29
+
30
+ BUNDLED WITH
31
+ 2.2.29
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # IpynbDiff: Better diff for Jupyter Notebooks
2
+
3
+ This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
4
+ into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
5
+ diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
6
+ that the entire file is readable on the diff.
7
+
8
+ The result are diffs that are much easier to read:
9
+
10
+ | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
+ | ------ | ------ | ------ |
12
+ | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
+ | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
14
+
15
+
16
+ This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
17
+ but now has extended functionality although not working as git driver.
18
+
19
+ ## Usage
20
+
21
+ ### Generating diffs
22
+
23
+ ```ruby
24
+ IpynbDiff.diff(from_path, to_path, options)
25
+ ```
26
+
27
+ Options:
28
+
29
+ ```ruby
30
+ @default_transform_options = {
31
+ preprocess_input: true, # Whether the input should be transformed
32
+ write_output_to: nil, # Pass a path to save the output to a file
33
+ format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
34
+ sources_are_files: false, # Weather to use the from/to as string or path to a file
35
+ raise_if_invalid_notebook: false, # Raises an error if the notebooks are invalid, otherwise returns nil
36
+ transform_options: @default_transform_options, # See below for transform options
37
+ diff_opts: {
38
+ include_diff_info: false # These are passed to Diffy https://github.com/samg/diffy
39
+ }
40
+ }
41
+ ```
42
+
43
+ ### Transforming the notebooks
44
+
45
+ It might be necessary to have the transformed files in addition to the diff.
46
+
47
+ ```ruby
48
+ IpynbDiff.transform(notebook, options)
49
+ ```
50
+
51
+ Options:
52
+
53
+ ```ruby
54
+ @default_transform_options = {
55
+ include_metadata: false, # Whether to include or not the notebook metadata (kernel, language, etc)
56
+ cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
57
+ }
58
+ ```
data/ipynbdiff.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'ipynbdiff'
7
+ s.version = IpynbDiff::VERSION
8
+ s.summary = 'Human Readable diffs for Jupyter Notebooks'
9
+ s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
10
+ s.authors = ['Eduardo Bonet']
11
+ s.email = 'ebonet@gitlab.com'
12
+ # Specify which files should be added to the gem when it is released.
13
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
14
+ s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
15
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec|example)/}) }
16
+ end
17
+ s.homepage =
18
+ 'https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff'
19
+ s.license = 'MIT'
20
+
21
+ s.require_paths = ['lib']
22
+
23
+ s.add_runtime_dependency 'diffy', '3.3.0'
24
+ s.add_runtime_dependency 'json', '2.5.1'
25
+
26
+ s.add_development_dependency 'bundler', '~> 2.2'
27
+ s.add_development_dependency 'guard-rspec'
28
+ s.add_development_dependency 'pry'
29
+ s.add_development_dependency 'rake'
30
+ s.add_development_dependency 'rspec'
31
+ end
data/lib/ipynbdiff.rb ADDED
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Human Readable Jupyter Diffs
4
+ module IpynbDiff
5
+ require 'transformer'
6
+ require 'diffy'
7
+
8
+ @default_transform_options = {
9
+ include_metadata: false,
10
+ cell_decorator: :html
11
+ }
12
+
13
+ @default_diff_options = {
14
+ preprocess_input: true,
15
+ write_output_to: nil,
16
+ format: :text,
17
+ sources_are_files: false,
18
+ raise_if_invalid_notebook: false,
19
+ transform_options: @default_transform_options,
20
+ diff_opts: {
21
+ include_diff_info: false
22
+ }
23
+ }.freeze
24
+
25
+ def self.prepare_input(to_prepare, options)
26
+ return '' unless to_prepare
27
+
28
+ prep = to_prepare
29
+ prep = File.read(prep) if options[:sources_are_files]
30
+ prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
31
+ prep
32
+ end
33
+
34
+ def self.diff(
35
+ from_notebook,
36
+ to_notebook,
37
+ options = @default_diff_options
38
+ )
39
+ options = @default_diff_options.merge(options)
40
+
41
+ from = prepare_input(from_notebook, options)
42
+ to = prepare_input(to_notebook, options)
43
+
44
+ d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
45
+ File.write(options[:write_output_to], d) if options[:write_output_to]
46
+ d
47
+ rescue InvalidNotebookError
48
+ raise if options[:raise_if_invalid_notebook]
49
+ end
50
+
51
+ def self.transform(notebook, raise_errors: false, options: @default_transform_options)
52
+ options = @default_transform_options.merge(options)
53
+
54
+ Transformer.new(**options).transform(notebook)
55
+ rescue InvalidNotebookError
56
+ raise if raise_errors
57
+ end
58
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+
5
+ # Transforms Jupyter output data into markdown
6
+ class OutputTransformer
7
+
8
+ ORDERED_KEYS = {
9
+ 'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
+ 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
+ }.freeze
12
+
13
+ def transform(output)
14
+ case (output_type = output['output_type'])
15
+ when 'error'
16
+ transform_error(output['traceback'])
17
+ when 'execute_result', 'display_data'
18
+ transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
+ end
20
+ end
21
+
22
+ def transform_error(traceback)
23
+ traceback.map do |t|
24
+ t.split("\n").map do |line|
25
+ line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
26
+ end
27
+ end
28
+ end
29
+
30
+ def transform_non_error(accepted_keys, elements)
31
+ accepted_keys.map do |key|
32
+ transform_element(key, elements[key]) if elements.key?(key)
33
+ end.flatten
34
+ end
35
+
36
+ def transform_element(output_type, output_element)
37
+ case output_type
38
+ when 'image/png', 'image/jpeg'
39
+ transform_image(output_type, output_element)
40
+ when 'image/svg+xml'
41
+ transform_svg(output_element)
42
+ when 'text/markdown', 'text/latex', 'text/plain'
43
+ transform_text(output_element)
44
+ end
45
+ end
46
+
47
+ def transform_image(image_type, image_content)
48
+ [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
49
+ end
50
+
51
+ def transform_svg(image_content)
52
+ lines = image_content.is_a?(Array) ? image_content : [image_content]
53
+
54
+ single_line = lines.map(&:strip).join('').gsub(/\s+/, ' ')
55
+
56
+ [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
57
+ end
58
+
59
+ def transform_text(text_content)
60
+ lines = text_content.is_a?(Array) ? text_content : [text_content]
61
+
62
+ lines.map { |line| " #{line}" }.append("\n")
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ class InvalidNotebookError < StandardError
5
+ end
6
+
7
+ # Returns a markdown version of the Jupyter Notebook
8
+ class Transformer
9
+ require 'json'
10
+ require 'yaml'
11
+ require 'output_transformer'
12
+
13
+ @cell_decorator = :html
14
+ @include_metadata = true
15
+
16
+
17
+ def initialize(include_metadata: true, cell_decorator: :html)
18
+ @include_metadata = include_metadata
19
+ @cell_decorator = cell_decorator
20
+ @output_transformer = OutputTransformer.new
21
+ end
22
+
23
+ def validate_notebook(notebook)
24
+ notebook_json = JSON.parse(notebook)
25
+
26
+ return notebook_json if notebook_json.key?('cells')
27
+
28
+ raise InvalidNotebookError
29
+ rescue JSON::ParserError
30
+ raise InvalidNotebookError
31
+ end
32
+
33
+ def transform(notebook)
34
+ notebook_json = validate_notebook(notebook)
35
+ transformed_blocks = notebook_json['cells'].map do |cell|
36
+ decorate_cell(transform_cell(cell, notebook_json), cell)
37
+ end
38
+
39
+ transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
40
+ transformed_blocks.join("\n")
41
+ end
42
+
43
+ def decorate_cell(rows, cell)
44
+ tags = cell['metadata']&.fetch('tags', [])
45
+ type = cell['cell_type'] || 'raw'
46
+
47
+ case @cell_decorator
48
+ when :html
49
+ rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
50
+ .append("\n</div>\n")
51
+ when :percent
52
+ rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
53
+ else
54
+ rows
55
+ end.join('')
56
+ end
57
+
58
+ def transform_cell(cell, notebook)
59
+ cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
60
+ end
61
+
62
+ def decorate_output(output_rows, output)
63
+ if @cell_decorator == :html
64
+ output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
65
+ else
66
+ output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
67
+ end
68
+ end
69
+
70
+ def transform_code_cell(cell, notebook)
71
+ [
72
+ %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}\n),
73
+ *cell['source'],
74
+ "\n```\n",
75
+ *cell['outputs'].map { |output| transform_output(output) }
76
+ ]
77
+ end
78
+
79
+ def transform_output(output)
80
+ transformed = @output_transformer.transform(output)
81
+
82
+ decorate_output(transformed, output).join('') if transformed
83
+ end
84
+
85
+ def transform_text_cell(cell)
86
+ source = cell['source']
87
+ (source.is_a?(Array) ? source : [source]).append("\n")
88
+ end
89
+
90
+ def transform_metadata(notebook_json)
91
+ {
92
+ 'jupyter' => {
93
+ 'kernelspec' => notebook_json['metadata']['kernelspec'],
94
+ 'language_info' => notebook_json['metadata']['language_info'],
95
+ 'nbformat' => notebook_json['nbformat'],
96
+ 'nbformat_minor' => notebook_json['nbformat_minor']
97
+ }
98
+ }.to_yaml + "---\n"
99
+ end
100
+ end
101
+ end
data/lib/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "0.0.7"
5
+ end
metadata ADDED
@@ -0,0 +1,152 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ipynbdiff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.7
5
+ platform: ruby
6
+ authors:
7
+ - Eduardo Bonet
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-12-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: diffy
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 3.3.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 3.3.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 2.5.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 2.5.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.2'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.2'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard-rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Better diff for Jupyter Notebooks by first preprocessing them and removing
112
+ clutter
113
+ email: ebonet@gitlab.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".VERSION.TMPL"
119
+ - ".gitignore"
120
+ - ".gitlab-ci.yml"
121
+ - Gemfile
122
+ - Gemfile.lock
123
+ - README.md
124
+ - ipynbdiff.gemspec
125
+ - lib/ipynbdiff.rb
126
+ - lib/output_transformer.rb
127
+ - lib/transformer.rb
128
+ - lib/version.rb
129
+ homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
130
+ licenses:
131
+ - MIT
132
+ metadata: {}
133
+ post_install_message:
134
+ rdoc_options: []
135
+ require_paths:
136
+ - lib
137
+ required_ruby_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ required_rubygems_version: !ruby/object:Gem::Requirement
143
+ requirements:
144
+ - - ">="
145
+ - !ruby/object:Gem::Version
146
+ version: '0'
147
+ requirements: []
148
+ rubygems_version: 3.1.6
149
+ signing_key:
150
+ specification_version: 4
151
+ summary: Human Readable diffs for Jupyter Notebooks
152
+ test_files: []