ipynbdiff 0.0.7 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 494a85c40bddb17c1e26f87b0ed2429c880de0ff016dbe5c01a576bb2faeb728
4
- data.tar.gz: 707cda8c1a84919811fdb3502398a3467c4b8224e90a160537c22f8c4bd2b092
3
+ metadata.gz: c0839dd5e5fbcdf19b5525d859fa7d224ec1bb647d7131f9cc20487190ccea80
4
+ data.tar.gz: ca32de2c784712f66f2f1c8b5642b32fca0078eae0279c8b30f975216101d143
5
5
  SHA512:
6
- metadata.gz: efc5a2501aadcddc2c4857ff14bed582324b5e4ed25956791bba4b615d6bb26311edb11823984351d4f880de5d14b8caffee8063dde260b3b9912c77d408e059
7
- data.tar.gz: b3c374a1c6314967d079b754bfbce3a8f8177ccb1f82431dc119e6d91ac4b9045f03b7d8b6998c71435525c447b4e7bae09e1a406f4c5372e098ece1c69d547b
6
+ metadata.gz: 45353d2e38a4378cb5f785edb6b38d250bf38ce4934f8943dcbe08973e4c187942449d185d4a3af59649d9459c74071475b33f23033c192b4345b24f95a2fd78
7
+ data.tar.gz: 7d8df2c34356018dfe098c0f36e8fb8f3c189c8121411826b9923b406ead730856a9e46ca57124b4e4e23e0409e1fdf83c5785686ab6aa0254bcedca07b95246
data/Gemfile CHANGED
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- source "https://rubygems.org"
4
-
5
3
  gem 'diffy', '3.3.0'
6
4
  gem 'json', '2.5.1'
7
5
  gem 'rspec', '3.10.0'
data/Gemfile.lock CHANGED
@@ -1,5 +1,4 @@
1
1
  GEM
2
- remote: https://rubygems.org/
3
2
  specs:
4
3
  diff-lcs (1.4.4)
5
4
  diffy (3.3.0)
@@ -19,7 +18,6 @@ GEM
19
18
  rspec-support (3.10.2)
20
19
 
21
20
  PLATFORMS
22
- ruby
23
21
  x86_64-darwin-20
24
22
 
25
23
  DEPENDENCIES
@@ -28,4 +26,4 @@ DEPENDENCIES
28
26
  rspec (= 3.10.0)
29
27
 
30
28
  BUNDLED WITH
31
- 2.2.29
29
+ 2.2.28
data/README.md CHANGED
@@ -1,58 +1,3 @@
1
- # IpynbDiff: Better diff for Jupyter Notebooks
1
+ # rb-ipynbdiff: Better Jupyter Notebook diffs, in Ruby
2
2
 
3
- This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
4
- into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
5
- diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
6
- that the entire file is readable on the diff.
7
-
8
- The result are diffs that are much easier to read:
9
-
10
- | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
- | ------ | ------ | ------ |
12
- | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
- | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
14
-
15
-
16
- This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
17
- but now has extended functionality although not working as git driver.
18
-
19
- ## Usage
20
-
21
- ### Generating diffs
22
-
23
- ```ruby
24
- IpynbDiff.diff(from_path, to_path, options)
25
- ```
26
-
27
- Options:
28
-
29
- ```ruby
30
- @default_transform_options = {
31
- preprocess_input: true, # Whether the input should be transformed
32
- write_output_to: nil, # Pass a path to save the output to a file
33
- format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
34
- sources_are_files: false, # Weather to use the from/to as string or path to a file
35
- raise_if_invalid_notebook: false, # Raises an error if the notebooks are invalid, otherwise returns nil
36
- transform_options: @default_transform_options, # See below for transform options
37
- diff_opts: {
38
- include_diff_info: false # These are passed to Diffy https://github.com/samg/diffy
39
- }
40
- }
41
- ```
42
-
43
- ### Transforming the notebooks
44
-
45
- It might be necessary to have the transformed files in addition to the diff.
46
-
47
- ```ruby
48
- IpynbDiff.transform(notebook, options)
49
- ```
50
-
51
- Options:
52
-
53
- ```ruby
54
- @default_transform_options = {
55
- include_metadata: false, # Whether to include or not the notebook metadata (kernel, language, etc)
56
- cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
57
- }
58
- ```
3
+ This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff)
data/ipynbdiff.gemspec CHANGED
@@ -1,12 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "lib/version"
4
-
5
3
  Gem::Specification.new do |s|
6
4
  s.name = 'ipynbdiff'
7
- s.version = IpynbDiff::VERSION
8
- s.summary = 'Human Readable diffs for Jupyter Notebooks'
9
- s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
5
+ s.version = '0.3.1'
6
+ s.summary = 'Human Readble diffs for Jupyter Notebooks'
7
+ s.description = 'A simple hello world gem'
10
8
  s.authors = ['Eduardo Bonet']
11
9
  s.email = 'ebonet@gitlab.com'
12
10
  # Specify which files should be added to the gem when it is released.
data/lib/ipynbdiff.rb CHANGED
@@ -2,57 +2,41 @@
2
2
 
3
3
  # Human Readable Jupyter Diffs
4
4
  module IpynbDiff
5
- require 'transformer'
5
+ require 'transformer.rb'
6
6
  require 'diffy'
7
7
 
8
- @default_transform_options = {
9
- include_metadata: false,
10
- cell_decorator: :html
11
- }
12
-
13
- @default_diff_options = {
14
- preprocess_input: true,
8
+ @default_options = {
9
+ preprocess_input: TRUE,
15
10
  write_output_to: nil,
16
11
  format: :text,
17
- sources_are_files: false,
18
- raise_if_invalid_notebook: false,
19
- transform_options: @default_transform_options,
12
+ sources_are_files: FALSE,
20
13
  diff_opts: {
21
- include_diff_info: false
14
+ include_diff_info: FALSE
22
15
  }
23
16
  }.freeze
24
17
 
25
- def self.prepare_input(to_prepare, options)
26
- return '' unless to_prepare
18
+ def self.prepare_input(to_prepare, load_from_file, preprocess)
19
+ prepared = to_prepare
20
+ prepared = File.read(prepared) if load_from_file
21
+ prepared = Transformer.transform(prepared) if preprocess
27
22
 
28
- prep = to_prepare
29
- prep = File.read(prep) if options[:sources_are_files]
30
- prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
31
- prep
23
+ prepared
32
24
  end
33
25
 
34
26
  def self.diff(
35
27
  from_notebook,
36
28
  to_notebook,
37
- options = @default_diff_options
29
+ options = {}
38
30
  )
39
- options = @default_diff_options.merge(options)
31
+ options = @default_options.merge(options)
40
32
 
41
- from = prepare_input(from_notebook, options)
42
- to = prepare_input(to_notebook, options)
33
+ from = prepare_input(from_notebook, options[:sources_are_files], options[:preprocess_input])
34
+ to = prepare_input(to_notebook, options[:sources_are_files], options[:preprocess_input])
43
35
 
44
36
  d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
45
- File.write(options[:write_output_to], d) if options[:write_output_to]
46
- d
47
- rescue InvalidNotebookError
48
- raise if options[:raise_if_invalid_notebook]
49
- end
50
37
 
51
- def self.transform(notebook, raise_errors: false, options: @default_transform_options)
52
- options = @default_transform_options.merge(options)
38
+ File.write(options[:write_output_to], d) if options[:write_output_to]
53
39
 
54
- Transformer.new(**options).transform(notebook)
55
- rescue InvalidNotebookError
56
- raise if raise_errors
40
+ d
57
41
  end
58
42
  end
data/lib/transformer.rb CHANGED
@@ -1,93 +1,93 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module IpynbDiff
4
- class InvalidNotebookError < StandardError
5
- end
6
-
7
4
  # Returns a markdown version of the Jupyter Notebook
8
5
  class Transformer
9
6
  require 'json'
10
7
  require 'yaml'
11
- require 'output_transformer'
12
-
13
- @cell_decorator = :html
14
- @include_metadata = true
15
-
16
8
 
17
- def initialize(include_metadata: true, cell_decorator: :html)
18
- @include_metadata = include_metadata
19
- @cell_decorator = cell_decorator
20
- @output_transformer = OutputTransformer.new
21
- end
22
-
23
- def validate_notebook(notebook)
9
+ def self.transform(notebook, include_metadata: TRUE)
24
10
  notebook_json = JSON.parse(notebook)
11
+ transformed_blocks = notebook_json['cells'].map { |cell| transform_cell(cell, notebook_json) }
12
+ transformed_blocks.prepend(transform_metadata(notebook_json)) if include_metadata
13
+ transformed_blocks.join("\n")
14
+ end
25
15
 
26
- return notebook_json if notebook_json.key?('cells')
27
-
28
- raise InvalidNotebookError
29
- rescue JSON::ParserError
30
- raise InvalidNotebookError
16
+ def self.transform_cell(cell, notebook)
17
+ cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
31
18
  end
32
19
 
33
- def transform(notebook)
34
- notebook_json = validate_notebook(notebook)
35
- transformed_blocks = notebook_json['cells'].map do |cell|
36
- decorate_cell(transform_cell(cell, notebook_json), cell)
37
- end
20
+ def self.transform_code_cell(cell, notebook)
21
+ tags = cell['metadata'].fetch('tags', []).join(' ')
38
22
 
39
- transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
40
- transformed_blocks.join("\n")
23
+ [
24
+ %(<div class="cell code" data-id="#{cell['id']}" data-tags="#{tags}">\n\n),
25
+ %(``` #{notebook['metadata']['kernelspec']['language']}\n),
26
+ *cell['source'],
27
+ "\n```\n",
28
+ *cell['outputs'].map { |output| transform_output(output) },
29
+ "\n</div>\n"
30
+ ].join('')
41
31
  end
42
32
 
43
- def decorate_cell(rows, cell)
44
- tags = cell['metadata']&.fetch('tags', [])
45
- type = cell['cell_type'] || 'raw'
46
-
47
- case @cell_decorator
48
- when :html
49
- rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
50
- .append("\n</div>\n")
51
- when :percent
52
- rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
53
- else
54
- rows
55
- end.join('')
33
+ def self.format_traceback(traceback)
34
+ traceback.map do |t|
35
+ t.split("\n").map do |line|
36
+ line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip
37
+ end
38
+ end.join("\n")
56
39
  end
57
40
 
58
- def transform_cell(cell, notebook)
59
- cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
41
+ def self.transform_execute_result(output)
42
+ [
43
+ %(\n<div class="output execute_result">\n\n),
44
+ *output['data']['text/plain'].map { |line| " #{line}" },
45
+ "\n\n</div>\n"
46
+ ].join('')
60
47
  end
61
48
 
62
- def decorate_output(output_rows, output)
63
- if @cell_decorator == :html
64
- output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
65
- else
66
- output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
49
+ def self.transform_image_result(output)
50
+ if output['data'].key?('image/png')
51
+ [
52
+ %(\n<div class="output display_data">\n\n),
53
+ "![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})",
54
+ "\n\n</div>\n"
55
+ ].join('')
67
56
  end
68
57
  end
69
58
 
70
- def transform_code_cell(cell, notebook)
59
+ def self.transform_error_result(output)
71
60
  [
72
- %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}\n),
73
- *cell['source'],
74
- "\n```\n",
75
- *cell['outputs'].map { |output| transform_output(output) }
76
- ]
61
+ %(\n<div class="output error">\n\n),
62
+ format_traceback(output['traceback']),
63
+ "\n\n</div>\n"
64
+ ].join('')
77
65
  end
78
66
 
79
- def transform_output(output)
80
- transformed = @output_transformer.transform(output)
81
-
82
- decorate_output(transformed, output).join('') if transformed
67
+ def self.transform_output(output)
68
+ case output['output_type']
69
+ when 'execute_result'
70
+ transform_execute_result(output)
71
+ when 'display_data'
72
+ transform_image_result(output)
73
+ when 'error'
74
+ transform_error_result(output)
75
+ end
83
76
  end
84
77
 
85
- def transform_text_cell(cell)
86
- source = cell['source']
87
- (source.is_a?(Array) ? source : [source]).append("\n")
78
+ def self.transform_text_cell(cell)
79
+ tags = cell['metadata'].fetch('tags', []).join(' ')
80
+ id = cell['id']
81
+ cell_type = cell['cell_type']
82
+
83
+ [
84
+ %(<div class="cell #{cell_type}" data-id="#{id}" data-tags="#{tags}">\n\n),
85
+ *cell['source'],
86
+ "\n\n</div>\n"
87
+ ].join('')
88
88
  end
89
89
 
90
- def transform_metadata(notebook_json)
90
+ def self.transform_metadata(notebook_json)
91
91
  {
92
92
  'jupyter' => {
93
93
  'kernelspec' => notebook_json['metadata']['kernelspec'],
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-06 00:00:00.000000000 Z
11
+ date: 2021-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy
@@ -108,24 +108,19 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
- description: Better diff for Jupyter Notebooks by first preprocessing them and removing
112
- clutter
111
+ description: A simple hello world gem
113
112
  email: ebonet@gitlab.com
114
113
  executables: []
115
114
  extensions: []
116
115
  extra_rdoc_files: []
117
116
  files:
118
- - ".VERSION.TMPL"
119
117
  - ".gitignore"
120
- - ".gitlab-ci.yml"
121
118
  - Gemfile
122
119
  - Gemfile.lock
123
120
  - README.md
124
121
  - ipynbdiff.gemspec
125
122
  - lib/ipynbdiff.rb
126
- - lib/output_transformer.rb
127
123
  - lib/transformer.rb
128
- - lib/version.rb
129
124
  homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
130
125
  licenses:
131
126
  - MIT
@@ -148,5 +143,5 @@ requirements: []
148
143
  rubygems_version: 3.1.6
149
144
  signing_key:
150
145
  specification_version: 4
151
- summary: Human Readable diffs for Jupyter Notebooks
146
+ summary: Human Readble diffs for Jupyter Notebooks
152
147
  test_files: []
data/.VERSION.TMPL DELETED
@@ -1,5 +0,0 @@
1
- # lib/emoticon/version.rb
2
-
3
- module IpynbDiff
4
- VERSION = "GEM_VERSION"
5
- end
data/.gitlab-ci.yml DELETED
@@ -1,32 +0,0 @@
1
- image: ruby:2.7
2
-
3
- stages:
4
- # - test
5
- # - build
6
- - deploy
7
-
8
- # specs:
9
- # stage: test
10
- # script:
11
- # - bundle install
12
- # - bundle exec rspec
13
-
14
- # build-gem:
15
- # stage: build
16
- # script:
17
- # - bundle install
18
- # - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.7/ > lib/version.rb
19
- # - gem build ipynbdiff.gemspec
20
- # artifacts:
21
- # paths:
22
- # - ipynbdiff-0.0.7.gem
23
-
24
- deploy-gem:
25
- stage: deploy
26
- script:
27
- - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.7/ > lib/version.rb
28
- - echo $GEM_HOST_API_KEY
29
- - bundle install
30
- - gem build ipynbdiff.gemspec
31
- - gem push ipynbdiff-0.0.7.gem
32
-
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module IpynbDiff
4
-
5
- # Transforms Jupyter output data into markdown
6
- class OutputTransformer
7
-
8
- ORDERED_KEYS = {
9
- 'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
- 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
- }.freeze
12
-
13
- def transform(output)
14
- case (output_type = output['output_type'])
15
- when 'error'
16
- transform_error(output['traceback'])
17
- when 'execute_result', 'display_data'
18
- transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
- end
20
- end
21
-
22
- def transform_error(traceback)
23
- traceback.map do |t|
24
- t.split("\n").map do |line|
25
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
26
- end
27
- end
28
- end
29
-
30
- def transform_non_error(accepted_keys, elements)
31
- accepted_keys.map do |key|
32
- transform_element(key, elements[key]) if elements.key?(key)
33
- end.flatten
34
- end
35
-
36
- def transform_element(output_type, output_element)
37
- case output_type
38
- when 'image/png', 'image/jpeg'
39
- transform_image(output_type, output_element)
40
- when 'image/svg+xml'
41
- transform_svg(output_element)
42
- when 'text/markdown', 'text/latex', 'text/plain'
43
- transform_text(output_element)
44
- end
45
- end
46
-
47
- def transform_image(image_type, image_content)
48
- [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
49
- end
50
-
51
- def transform_svg(image_content)
52
- lines = image_content.is_a?(Array) ? image_content : [image_content]
53
-
54
- single_line = lines.map(&:strip).join('').gsub(/\s+/, ' ')
55
-
56
- [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
57
- end
58
-
59
- def transform_text(text_content)
60
- lines = text_content.is_a?(Array) ? text_content : [text_content]
61
-
62
- lines.map { |line| " #{line}" }.append("\n")
63
- end
64
- end
65
- end
data/lib/version.rb DELETED
@@ -1,5 +0,0 @@
1
- # lib/emoticon/version.rb
2
-
3
- module IpynbDiff
4
- VERSION = "0.0.7"
5
- end