ipynbdiff 0.0.7 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 494a85c40bddb17c1e26f87b0ed2429c880de0ff016dbe5c01a576bb2faeb728
4
- data.tar.gz: 707cda8c1a84919811fdb3502398a3467c4b8224e90a160537c22f8c4bd2b092
3
+ metadata.gz: c0839dd5e5fbcdf19b5525d859fa7d224ec1bb647d7131f9cc20487190ccea80
4
+ data.tar.gz: ca32de2c784712f66f2f1c8b5642b32fca0078eae0279c8b30f975216101d143
5
5
  SHA512:
6
- metadata.gz: efc5a2501aadcddc2c4857ff14bed582324b5e4ed25956791bba4b615d6bb26311edb11823984351d4f880de5d14b8caffee8063dde260b3b9912c77d408e059
7
- data.tar.gz: b3c374a1c6314967d079b754bfbce3a8f8177ccb1f82431dc119e6d91ac4b9045f03b7d8b6998c71435525c447b4e7bae09e1a406f4c5372e098ece1c69d547b
6
+ metadata.gz: 45353d2e38a4378cb5f785edb6b38d250bf38ce4934f8943dcbe08973e4c187942449d185d4a3af59649d9459c74071475b33f23033c192b4345b24f95a2fd78
7
+ data.tar.gz: 7d8df2c34356018dfe098c0f36e8fb8f3c189c8121411826b9923b406ead730856a9e46ca57124b4e4e23e0409e1fdf83c5785686ab6aa0254bcedca07b95246
data/Gemfile CHANGED
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- source "https://rubygems.org"
4
-
5
3
  gem 'diffy', '3.3.0'
6
4
  gem 'json', '2.5.1'
7
5
  gem 'rspec', '3.10.0'
data/Gemfile.lock CHANGED
@@ -1,5 +1,4 @@
1
1
  GEM
2
- remote: https://rubygems.org/
3
2
  specs:
4
3
  diff-lcs (1.4.4)
5
4
  diffy (3.3.0)
@@ -19,7 +18,6 @@ GEM
19
18
  rspec-support (3.10.2)
20
19
 
21
20
  PLATFORMS
22
- ruby
23
21
  x86_64-darwin-20
24
22
 
25
23
  DEPENDENCIES
@@ -28,4 +26,4 @@ DEPENDENCIES
28
26
  rspec (= 3.10.0)
29
27
 
30
28
  BUNDLED WITH
31
- 2.2.29
29
+ 2.2.28
data/README.md CHANGED
@@ -1,58 +1,3 @@
1
- # IpynbDiff: Better diff for Jupyter Notebooks
1
+ # rb-ipynbdiff: Better Jupyter Notebook diffs, in Ruby
2
2
 
3
- This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
4
- into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
5
- diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
6
- that the entire file is readable on the diff.
7
-
8
- The result are diffs that are much easier to read:
9
-
10
- | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
- | ------ | ------ | ------ |
12
- | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
- | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
14
-
15
-
16
- This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
17
- but now has extended functionality although not working as git driver.
18
-
19
- ## Usage
20
-
21
- ### Generating diffs
22
-
23
- ```ruby
24
- IpynbDiff.diff(from_path, to_path, options)
25
- ```
26
-
27
- Options:
28
-
29
- ```ruby
30
- @default_transform_options = {
31
- preprocess_input: true, # Whether the input should be transformed
32
- write_output_to: nil, # Pass a path to save the output to a file
33
- format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
34
- sources_are_files: false, # Weather to use the from/to as string or path to a file
35
- raise_if_invalid_notebook: false, # Raises an error if the notebooks are invalid, otherwise returns nil
36
- transform_options: @default_transform_options, # See below for transform options
37
- diff_opts: {
38
- include_diff_info: false # These are passed to Diffy https://github.com/samg/diffy
39
- }
40
- }
41
- ```
42
-
43
- ### Transforming the notebooks
44
-
45
- It might be necessary to have the transformed files in addition to the diff.
46
-
47
- ```ruby
48
- IpynbDiff.transform(notebook, options)
49
- ```
50
-
51
- Options:
52
-
53
- ```ruby
54
- @default_transform_options = {
55
- include_metadata: false, # Whether to include or not the notebook metadata (kernel, language, etc)
56
- cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
57
- }
58
- ```
3
+ This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff)
data/ipynbdiff.gemspec CHANGED
@@ -1,12 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative "lib/version"
4
-
5
3
  Gem::Specification.new do |s|
6
4
  s.name = 'ipynbdiff'
7
- s.version = IpynbDiff::VERSION
8
- s.summary = 'Human Readable diffs for Jupyter Notebooks'
9
- s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
5
+ s.version = '0.3.1'
6
+ s.summary = 'Human Readble diffs for Jupyter Notebooks'
7
+ s.description = 'A simple hello world gem'
10
8
  s.authors = ['Eduardo Bonet']
11
9
  s.email = 'ebonet@gitlab.com'
12
10
  # Specify which files should be added to the gem when it is released.
data/lib/ipynbdiff.rb CHANGED
@@ -2,57 +2,41 @@
2
2
 
3
3
  # Human Readable Jupyter Diffs
4
4
  module IpynbDiff
5
- require 'transformer'
5
+ require 'transformer.rb'
6
6
  require 'diffy'
7
7
 
8
- @default_transform_options = {
9
- include_metadata: false,
10
- cell_decorator: :html
11
- }
12
-
13
- @default_diff_options = {
14
- preprocess_input: true,
8
+ @default_options = {
9
+ preprocess_input: TRUE,
15
10
  write_output_to: nil,
16
11
  format: :text,
17
- sources_are_files: false,
18
- raise_if_invalid_notebook: false,
19
- transform_options: @default_transform_options,
12
+ sources_are_files: FALSE,
20
13
  diff_opts: {
21
- include_diff_info: false
14
+ include_diff_info: FALSE
22
15
  }
23
16
  }.freeze
24
17
 
25
- def self.prepare_input(to_prepare, options)
26
- return '' unless to_prepare
18
+ def self.prepare_input(to_prepare, load_from_file, preprocess)
19
+ prepared = to_prepare
20
+ prepared = File.read(prepared) if load_from_file
21
+ prepared = Transformer.transform(prepared) if preprocess
27
22
 
28
- prep = to_prepare
29
- prep = File.read(prep) if options[:sources_are_files]
30
- prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
31
- prep
23
+ prepared
32
24
  end
33
25
 
34
26
  def self.diff(
35
27
  from_notebook,
36
28
  to_notebook,
37
- options = @default_diff_options
29
+ options = {}
38
30
  )
39
- options = @default_diff_options.merge(options)
31
+ options = @default_options.merge(options)
40
32
 
41
- from = prepare_input(from_notebook, options)
42
- to = prepare_input(to_notebook, options)
33
+ from = prepare_input(from_notebook, options[:sources_are_files], options[:preprocess_input])
34
+ to = prepare_input(to_notebook, options[:sources_are_files], options[:preprocess_input])
43
35
 
44
36
  d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
45
- File.write(options[:write_output_to], d) if options[:write_output_to]
46
- d
47
- rescue InvalidNotebookError
48
- raise if options[:raise_if_invalid_notebook]
49
- end
50
37
 
51
- def self.transform(notebook, raise_errors: false, options: @default_transform_options)
52
- options = @default_transform_options.merge(options)
38
+ File.write(options[:write_output_to], d) if options[:write_output_to]
53
39
 
54
- Transformer.new(**options).transform(notebook)
55
- rescue InvalidNotebookError
56
- raise if raise_errors
40
+ d
57
41
  end
58
42
  end
data/lib/transformer.rb CHANGED
@@ -1,93 +1,93 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module IpynbDiff
4
- class InvalidNotebookError < StandardError
5
- end
6
-
7
4
  # Returns a markdown version of the Jupyter Notebook
8
5
  class Transformer
9
6
  require 'json'
10
7
  require 'yaml'
11
- require 'output_transformer'
12
-
13
- @cell_decorator = :html
14
- @include_metadata = true
15
-
16
8
 
17
- def initialize(include_metadata: true, cell_decorator: :html)
18
- @include_metadata = include_metadata
19
- @cell_decorator = cell_decorator
20
- @output_transformer = OutputTransformer.new
21
- end
22
-
23
- def validate_notebook(notebook)
9
+ def self.transform(notebook, include_metadata: TRUE)
24
10
  notebook_json = JSON.parse(notebook)
11
+ transformed_blocks = notebook_json['cells'].map { |cell| transform_cell(cell, notebook_json) }
12
+ transformed_blocks.prepend(transform_metadata(notebook_json)) if include_metadata
13
+ transformed_blocks.join("\n")
14
+ end
25
15
 
26
- return notebook_json if notebook_json.key?('cells')
27
-
28
- raise InvalidNotebookError
29
- rescue JSON::ParserError
30
- raise InvalidNotebookError
16
+ def self.transform_cell(cell, notebook)
17
+ cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
31
18
  end
32
19
 
33
- def transform(notebook)
34
- notebook_json = validate_notebook(notebook)
35
- transformed_blocks = notebook_json['cells'].map do |cell|
36
- decorate_cell(transform_cell(cell, notebook_json), cell)
37
- end
20
+ def self.transform_code_cell(cell, notebook)
21
+ tags = cell['metadata'].fetch('tags', []).join(' ')
38
22
 
39
- transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
40
- transformed_blocks.join("\n")
23
+ [
24
+ %(<div class="cell code" data-id="#{cell['id']}" data-tags="#{tags}">\n\n),
25
+ %(``` #{notebook['metadata']['kernelspec']['language']}\n),
26
+ *cell['source'],
27
+ "\n```\n",
28
+ *cell['outputs'].map { |output| transform_output(output) },
29
+ "\n</div>\n"
30
+ ].join('')
41
31
  end
42
32
 
43
- def decorate_cell(rows, cell)
44
- tags = cell['metadata']&.fetch('tags', [])
45
- type = cell['cell_type'] || 'raw'
46
-
47
- case @cell_decorator
48
- when :html
49
- rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
50
- .append("\n</div>\n")
51
- when :percent
52
- rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
53
- else
54
- rows
55
- end.join('')
33
+ def self.format_traceback(traceback)
34
+ traceback.map do |t|
35
+ t.split("\n").map do |line|
36
+ line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip
37
+ end
38
+ end.join("\n")
56
39
  end
57
40
 
58
- def transform_cell(cell, notebook)
59
- cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
41
+ def self.transform_execute_result(output)
42
+ [
43
+ %(\n<div class="output execute_result">\n\n),
44
+ *output['data']['text/plain'].map { |line| " #{line}" },
45
+ "\n\n</div>\n"
46
+ ].join('')
60
47
  end
61
48
 
62
- def decorate_output(output_rows, output)
63
- if @cell_decorator == :html
64
- output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
65
- else
66
- output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
49
+ def self.transform_image_result(output)
50
+ if output['data'].key?('image/png')
51
+ [
52
+ %(\n<div class="output display_data">\n\n),
53
+ "![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})",
54
+ "\n\n</div>\n"
55
+ ].join('')
67
56
  end
68
57
  end
69
58
 
70
- def transform_code_cell(cell, notebook)
59
+ def self.transform_error_result(output)
71
60
  [
72
- %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}\n),
73
- *cell['source'],
74
- "\n```\n",
75
- *cell['outputs'].map { |output| transform_output(output) }
76
- ]
61
+ %(\n<div class="output error">\n\n),
62
+ format_traceback(output['traceback']),
63
+ "\n\n</div>\n"
64
+ ].join('')
77
65
  end
78
66
 
79
- def transform_output(output)
80
- transformed = @output_transformer.transform(output)
81
-
82
- decorate_output(transformed, output).join('') if transformed
67
+ def self.transform_output(output)
68
+ case output['output_type']
69
+ when 'execute_result'
70
+ transform_execute_result(output)
71
+ when 'display_data'
72
+ transform_image_result(output)
73
+ when 'error'
74
+ transform_error_result(output)
75
+ end
83
76
  end
84
77
 
85
- def transform_text_cell(cell)
86
- source = cell['source']
87
- (source.is_a?(Array) ? source : [source]).append("\n")
78
+ def self.transform_text_cell(cell)
79
+ tags = cell['metadata'].fetch('tags', []).join(' ')
80
+ id = cell['id']
81
+ cell_type = cell['cell_type']
82
+
83
+ [
84
+ %(<div class="cell #{cell_type}" data-id="#{id}" data-tags="#{tags}">\n\n),
85
+ *cell['source'],
86
+ "\n\n</div>\n"
87
+ ].join('')
88
88
  end
89
89
 
90
- def transform_metadata(notebook_json)
90
+ def self.transform_metadata(notebook_json)
91
91
  {
92
92
  'jupyter' => {
93
93
  'kernelspec' => notebook_json['metadata']['kernelspec'],
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-06 00:00:00.000000000 Z
11
+ date: 2021-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy
@@ -108,24 +108,19 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
- description: Better diff for Jupyter Notebooks by first preprocessing them and removing
112
- clutter
111
+ description: A simple hello world gem
113
112
  email: ebonet@gitlab.com
114
113
  executables: []
115
114
  extensions: []
116
115
  extra_rdoc_files: []
117
116
  files:
118
- - ".VERSION.TMPL"
119
117
  - ".gitignore"
120
- - ".gitlab-ci.yml"
121
118
  - Gemfile
122
119
  - Gemfile.lock
123
120
  - README.md
124
121
  - ipynbdiff.gemspec
125
122
  - lib/ipynbdiff.rb
126
- - lib/output_transformer.rb
127
123
  - lib/transformer.rb
128
- - lib/version.rb
129
124
  homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
130
125
  licenses:
131
126
  - MIT
@@ -148,5 +143,5 @@ requirements: []
148
143
  rubygems_version: 3.1.6
149
144
  signing_key:
150
145
  specification_version: 4
151
- summary: Human Readable diffs for Jupyter Notebooks
146
+ summary: Human Readble diffs for Jupyter Notebooks
152
147
  test_files: []
data/.VERSION.TMPL DELETED
@@ -1,5 +0,0 @@
1
- # lib/emoticon/version.rb
2
-
3
- module IpynbDiff
4
- VERSION = "GEM_VERSION"
5
- end
data/.gitlab-ci.yml DELETED
@@ -1,32 +0,0 @@
1
- image: ruby:2.7
2
-
3
- stages:
4
- # - test
5
- # - build
6
- - deploy
7
-
8
- # specs:
9
- # stage: test
10
- # script:
11
- # - bundle install
12
- # - bundle exec rspec
13
-
14
- # build-gem:
15
- # stage: build
16
- # script:
17
- # - bundle install
18
- # - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.7/ > lib/version.rb
19
- # - gem build ipynbdiff.gemspec
20
- # artifacts:
21
- # paths:
22
- # - ipynbdiff-0.0.7.gem
23
-
24
- deploy-gem:
25
- stage: deploy
26
- script:
27
- - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.7/ > lib/version.rb
28
- - echo $GEM_HOST_API_KEY
29
- - bundle install
30
- - gem build ipynbdiff.gemspec
31
- - gem push ipynbdiff-0.0.7.gem
32
-
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module IpynbDiff
4
-
5
- # Transforms Jupyter output data into markdown
6
- class OutputTransformer
7
-
8
- ORDERED_KEYS = {
9
- 'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
- 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
- }.freeze
12
-
13
- def transform(output)
14
- case (output_type = output['output_type'])
15
- when 'error'
16
- transform_error(output['traceback'])
17
- when 'execute_result', 'display_data'
18
- transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
- end
20
- end
21
-
22
- def transform_error(traceback)
23
- traceback.map do |t|
24
- t.split("\n").map do |line|
25
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
26
- end
27
- end
28
- end
29
-
30
- def transform_non_error(accepted_keys, elements)
31
- accepted_keys.map do |key|
32
- transform_element(key, elements[key]) if elements.key?(key)
33
- end.flatten
34
- end
35
-
36
- def transform_element(output_type, output_element)
37
- case output_type
38
- when 'image/png', 'image/jpeg'
39
- transform_image(output_type, output_element)
40
- when 'image/svg+xml'
41
- transform_svg(output_element)
42
- when 'text/markdown', 'text/latex', 'text/plain'
43
- transform_text(output_element)
44
- end
45
- end
46
-
47
- def transform_image(image_type, image_content)
48
- [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
49
- end
50
-
51
- def transform_svg(image_content)
52
- lines = image_content.is_a?(Array) ? image_content : [image_content]
53
-
54
- single_line = lines.map(&:strip).join('').gsub(/\s+/, ' ')
55
-
56
- [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
57
- end
58
-
59
- def transform_text(text_content)
60
- lines = text_content.is_a?(Array) ? text_content : [text_content]
61
-
62
- lines.map { |line| " #{line}" }.append("\n")
63
- end
64
- end
65
- end
data/lib/version.rb DELETED
@@ -1,5 +0,0 @@
1
- # lib/emoticon/version.rb
2
-
3
- module IpynbDiff
4
- VERSION = "0.0.7"
5
- end