ipynbdiff 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 494a85c40bddb17c1e26f87b0ed2429c880de0ff016dbe5c01a576bb2faeb728
4
+ data.tar.gz: 707cda8c1a84919811fdb3502398a3467c4b8224e90a160537c22f8c4bd2b092
5
+ SHA512:
6
+ metadata.gz: efc5a2501aadcddc2c4857ff14bed582324b5e4ed25956791bba4b615d6bb26311edb11823984351d4f880de5d14b8caffee8063dde260b3b9912c77d408e059
7
+ data.tar.gz: b3c374a1c6314967d079b754bfbce3a8f8177ccb1f82431dc119e6d91ac4b9045f03b7d8b6998c71435525c447b4e7bae09e1a406f4c5372e098ece1c69d547b
data/.VERSION.TMPL ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "GEM_VERSION"
5
+ end
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ .tool-versions
2
+ .bundle
3
+
4
+
5
+ *.gem
6
+
data/.gitlab-ci.yml ADDED
@@ -0,0 +1,32 @@
1
+ image: ruby:2.7
2
+
3
+ stages:
4
+ # - test
5
+ # - build
6
+ - deploy
7
+
8
+ # specs:
9
+ # stage: test
10
+ # script:
11
+ # - bundle install
12
+ # - bundle exec rspec
13
+
14
+ # build-gem:
15
+ # stage: build
16
+ # script:
17
+ # - bundle install
18
+ # - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.7/ > lib/version.rb
19
+ # - gem build ipynbdiff.gemspec
20
+ # artifacts:
21
+ # paths:
22
+ # - ipynbdiff-0.0.7.gem
23
+
24
+ deploy-gem:
25
+ stage: deploy
26
+ script:
27
+ - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.7/ > lib/version.rb
28
+ - echo $GEM_HOST_API_KEY
29
+ - bundle install
30
+ - gem build ipynbdiff.gemspec
31
+ - gem push ipynbdiff-0.0.7.gem
32
+
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ gem 'diffy', '3.3.0'
6
+ gem 'json', '2.5.1'
7
+ gem 'rspec', '3.10.0'
data/Gemfile.lock ADDED
@@ -0,0 +1,31 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.4.4)
5
+ diffy (3.3.0)
6
+ json (2.5.1)
7
+ rspec (3.10.0)
8
+ rspec-core (~> 3.10.0)
9
+ rspec-expectations (~> 3.10.0)
10
+ rspec-mocks (~> 3.10.0)
11
+ rspec-core (3.10.1)
12
+ rspec-support (~> 3.10.0)
13
+ rspec-expectations (3.10.1)
14
+ diff-lcs (>= 1.2.0, < 2.0)
15
+ rspec-support (~> 3.10.0)
16
+ rspec-mocks (3.10.2)
17
+ diff-lcs (>= 1.2.0, < 2.0)
18
+ rspec-support (~> 3.10.0)
19
+ rspec-support (3.10.2)
20
+
21
+ PLATFORMS
22
+ ruby
23
+ x86_64-darwin-20
24
+
25
+ DEPENDENCIES
26
+ diffy (= 3.3.0)
27
+ json (= 2.5.1)
28
+ rspec (= 3.10.0)
29
+
30
+ BUNDLED WITH
31
+ 2.2.29
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # IpynbDiff: Better diff for Jupyter Notebooks
2
+
3
+ This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
4
+ into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
5
+ diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
6
+ that the entire file is readable on the diff.
7
+
8
+ The result are diffs that are much easier to read:
9
+
10
+ | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
+ | ------ | ------ | ------ |
12
+ | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
+ | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
14
+
15
+
16
+ This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
17
+ but now has extended functionality although not working as git driver.
18
+
19
+ ## Usage
20
+
21
+ ### Generating diffs
22
+
23
+ ```ruby
24
+ IpynbDiff.diff(from_path, to_path, options)
25
+ ```
26
+
27
+ Options:
28
+
29
+ ```ruby
30
+ @default_transform_options = {
31
+ preprocess_input: true, # Whether the input should be transformed
32
+ write_output_to: nil, # Pass a path to save the output to a file
33
+ format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
34
+ sources_are_files: false, # Weather to use the from/to as string or path to a file
35
+ raise_if_invalid_notebook: false, # Raises an error if the notebooks are invalid, otherwise returns nil
36
+ transform_options: @default_transform_options, # See below for transform options
37
+ diff_opts: {
38
+ include_diff_info: false # These are passed to Diffy https://github.com/samg/diffy
39
+ }
40
+ }
41
+ ```
42
+
43
+ ### Transforming the notebooks
44
+
45
+ It might be necessary to have the transformed files in addition to the diff.
46
+
47
+ ```ruby
48
+ IpynbDiff.transform(notebook, options)
49
+ ```
50
+
51
+ Options:
52
+
53
+ ```ruby
54
+ @default_transform_options = {
55
+ include_metadata: false, # Whether to include or not the notebook metadata (kernel, language, etc)
56
+ cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
57
+ }
58
+ ```
data/ipynbdiff.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'ipynbdiff'
7
+ s.version = IpynbDiff::VERSION
8
+ s.summary = 'Human Readable diffs for Jupyter Notebooks'
9
+ s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
10
+ s.authors = ['Eduardo Bonet']
11
+ s.email = 'ebonet@gitlab.com'
12
+ # Specify which files should be added to the gem when it is released.
13
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
14
+ s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
15
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec|example)/}) }
16
+ end
17
+ s.homepage =
18
+ 'https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff'
19
+ s.license = 'MIT'
20
+
21
+ s.require_paths = ['lib']
22
+
23
+ s.add_runtime_dependency 'diffy', '3.3.0'
24
+ s.add_runtime_dependency 'json', '2.5.1'
25
+
26
+ s.add_development_dependency 'bundler', '~> 2.2'
27
+ s.add_development_dependency 'guard-rspec'
28
+ s.add_development_dependency 'pry'
29
+ s.add_development_dependency 'rake'
30
+ s.add_development_dependency 'rspec'
31
+ end
data/lib/ipynbdiff.rb ADDED
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Human Readable Jupyter Diffs
4
+ module IpynbDiff
5
+ require 'transformer'
6
+ require 'diffy'
7
+
8
+ @default_transform_options = {
9
+ include_metadata: false,
10
+ cell_decorator: :html
11
+ }
12
+
13
+ @default_diff_options = {
14
+ preprocess_input: true,
15
+ write_output_to: nil,
16
+ format: :text,
17
+ sources_are_files: false,
18
+ raise_if_invalid_notebook: false,
19
+ transform_options: @default_transform_options,
20
+ diff_opts: {
21
+ include_diff_info: false
22
+ }
23
+ }.freeze
24
+
25
+ def self.prepare_input(to_prepare, options)
26
+ return '' unless to_prepare
27
+
28
+ prep = to_prepare
29
+ prep = File.read(prep) if options[:sources_are_files]
30
+ prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
31
+ prep
32
+ end
33
+
34
+ def self.diff(
35
+ from_notebook,
36
+ to_notebook,
37
+ options = @default_diff_options
38
+ )
39
+ options = @default_diff_options.merge(options)
40
+
41
+ from = prepare_input(from_notebook, options)
42
+ to = prepare_input(to_notebook, options)
43
+
44
+ d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
45
+ File.write(options[:write_output_to], d) if options[:write_output_to]
46
+ d
47
+ rescue InvalidNotebookError
48
+ raise if options[:raise_if_invalid_notebook]
49
+ end
50
+
51
+ def self.transform(notebook, raise_errors: false, options: @default_transform_options)
52
+ options = @default_transform_options.merge(options)
53
+
54
+ Transformer.new(**options).transform(notebook)
55
+ rescue InvalidNotebookError
56
+ raise if raise_errors
57
+ end
58
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+
5
+ # Transforms Jupyter output data into markdown
6
+ class OutputTransformer
7
+
8
+ ORDERED_KEYS = {
9
+ 'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
+ 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
+ }.freeze
12
+
13
+ def transform(output)
14
+ case (output_type = output['output_type'])
15
+ when 'error'
16
+ transform_error(output['traceback'])
17
+ when 'execute_result', 'display_data'
18
+ transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
+ end
20
+ end
21
+
22
+ def transform_error(traceback)
23
+ traceback.map do |t|
24
+ t.split("\n").map do |line|
25
+ line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
26
+ end
27
+ end
28
+ end
29
+
30
+ def transform_non_error(accepted_keys, elements)
31
+ accepted_keys.map do |key|
32
+ transform_element(key, elements[key]) if elements.key?(key)
33
+ end.flatten
34
+ end
35
+
36
+ def transform_element(output_type, output_element)
37
+ case output_type
38
+ when 'image/png', 'image/jpeg'
39
+ transform_image(output_type, output_element)
40
+ when 'image/svg+xml'
41
+ transform_svg(output_element)
42
+ when 'text/markdown', 'text/latex', 'text/plain'
43
+ transform_text(output_element)
44
+ end
45
+ end
46
+
47
+ def transform_image(image_type, image_content)
48
+ [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
49
+ end
50
+
51
+ def transform_svg(image_content)
52
+ lines = image_content.is_a?(Array) ? image_content : [image_content]
53
+
54
+ single_line = lines.map(&:strip).join('').gsub(/\s+/, ' ')
55
+
56
+ [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
57
+ end
58
+
59
+ def transform_text(text_content)
60
+ lines = text_content.is_a?(Array) ? text_content : [text_content]
61
+
62
+ lines.map { |line| " #{line}" }.append("\n")
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ class InvalidNotebookError < StandardError
5
+ end
6
+
7
+ # Returns a markdown version of the Jupyter Notebook
8
+ class Transformer
9
+ require 'json'
10
+ require 'yaml'
11
+ require 'output_transformer'
12
+
13
+ @cell_decorator = :html
14
+ @include_metadata = true
15
+
16
+
17
+ def initialize(include_metadata: true, cell_decorator: :html)
18
+ @include_metadata = include_metadata
19
+ @cell_decorator = cell_decorator
20
+ @output_transformer = OutputTransformer.new
21
+ end
22
+
23
+ def validate_notebook(notebook)
24
+ notebook_json = JSON.parse(notebook)
25
+
26
+ return notebook_json if notebook_json.key?('cells')
27
+
28
+ raise InvalidNotebookError
29
+ rescue JSON::ParserError
30
+ raise InvalidNotebookError
31
+ end
32
+
33
+ def transform(notebook)
34
+ notebook_json = validate_notebook(notebook)
35
+ transformed_blocks = notebook_json['cells'].map do |cell|
36
+ decorate_cell(transform_cell(cell, notebook_json), cell)
37
+ end
38
+
39
+ transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
40
+ transformed_blocks.join("\n")
41
+ end
42
+
43
+ def decorate_cell(rows, cell)
44
+ tags = cell['metadata']&.fetch('tags', [])
45
+ type = cell['cell_type'] || 'raw'
46
+
47
+ case @cell_decorator
48
+ when :html
49
+ rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
50
+ .append("\n</div>\n")
51
+ when :percent
52
+ rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
53
+ else
54
+ rows
55
+ end.join('')
56
+ end
57
+
58
+ def transform_cell(cell, notebook)
59
+ cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
60
+ end
61
+
62
+ def decorate_output(output_rows, output)
63
+ if @cell_decorator == :html
64
+ output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
65
+ else
66
+ output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
67
+ end
68
+ end
69
+
70
+ def transform_code_cell(cell, notebook)
71
+ [
72
+ %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}\n),
73
+ *cell['source'],
74
+ "\n```\n",
75
+ *cell['outputs'].map { |output| transform_output(output) }
76
+ ]
77
+ end
78
+
79
+ def transform_output(output)
80
+ transformed = @output_transformer.transform(output)
81
+
82
+ decorate_output(transformed, output).join('') if transformed
83
+ end
84
+
85
+ def transform_text_cell(cell)
86
+ source = cell['source']
87
+ (source.is_a?(Array) ? source : [source]).append("\n")
88
+ end
89
+
90
+ def transform_metadata(notebook_json)
91
+ {
92
+ 'jupyter' => {
93
+ 'kernelspec' => notebook_json['metadata']['kernelspec'],
94
+ 'language_info' => notebook_json['metadata']['language_info'],
95
+ 'nbformat' => notebook_json['nbformat'],
96
+ 'nbformat_minor' => notebook_json['nbformat_minor']
97
+ }
98
+ }.to_yaml + "---\n"
99
+ end
100
+ end
101
+ end
data/lib/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "0.0.7"
5
+ end
metadata ADDED
@@ -0,0 +1,152 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ipynbdiff
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.7
5
+ platform: ruby
6
+ authors:
7
+ - Eduardo Bonet
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-12-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: diffy
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 3.3.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 3.3.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 2.5.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 2.5.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.2'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.2'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard-rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Better diff for Jupyter Notebooks by first preprocessing them and removing
112
+ clutter
113
+ email: ebonet@gitlab.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".VERSION.TMPL"
119
+ - ".gitignore"
120
+ - ".gitlab-ci.yml"
121
+ - Gemfile
122
+ - Gemfile.lock
123
+ - README.md
124
+ - ipynbdiff.gemspec
125
+ - lib/ipynbdiff.rb
126
+ - lib/output_transformer.rb
127
+ - lib/transformer.rb
128
+ - lib/version.rb
129
+ homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
130
+ licenses:
131
+ - MIT
132
+ metadata: {}
133
+ post_install_message:
134
+ rdoc_options: []
135
+ require_paths:
136
+ - lib
137
+ required_ruby_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ required_rubygems_version: !ruby/object:Gem::Requirement
143
+ requirements:
144
+ - - ">="
145
+ - !ruby/object:Gem::Version
146
+ version: '0'
147
+ requirements: []
148
+ rubygems_version: 3.1.6
149
+ signing_key:
150
+ specification_version: 4
151
+ summary: Human Readable diffs for Jupyter Notebooks
152
+ test_files: []