ipynbdiff 0.3.2 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e457c9e3908f4ebdf452ec01428e71ca866730a6e36d4af344cb3e703fe989cc
4
- data.tar.gz: 4d12d8975526ad09c6f890af1283c7586773fbd8b2a43abda7d65772ac472818
3
+ metadata.gz: 10f77cf53513157724c6e1d8c8abc4659e3254dd77c3c14ebb2aaabd3155639c
4
+ data.tar.gz: 10815dc6a9cb76c73b4d6c9f712bdb28be9217a2b40efbcf076342e9f5d34811
5
5
  SHA512:
6
- metadata.gz: 3da474c1bad0797b392f51cb33692216d2326822b8654a507849c05d52d4631799c88ccbcd92bc8df1b01f72352b8031e782384f605c9d0abf07dcce9dd4a176
7
- data.tar.gz: ba36b69389d265d8ebd327a66d081688f242614938f51b0c3d644a07502aef1854ebb4804b20572ab9d81c64a7a45c4844b1bba6889ace7988ac85e95130a72d
6
+ metadata.gz: 06e895b990e7099d094b6e78c8a54fc6e5c618b28a9ae266cdb47a127189d67df7a7468f5a96fb2da21529f30911dfca761d7e4b0fc5452fc351e1878c697811
7
+ data.tar.gz: 54ee8fb62e05b130b304989787ee1032ffe41a2dc68dcdfde5bf7480588bdcd3a4f9a20c3b63eeb7cf21dd0ed89527b34d39a7ec15170f7cddda6114ea8fac0a
data/README.md CHANGED
@@ -1,3 +1,58 @@
1
- # rb-ipynbdiff: Better Jupyter Notebook diffs, in Ruby
1
+ # IpynbDiff: Better diff for Jupyter Notebooks
2
2
 
3
- This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff)
3
+ This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
4
+ into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
5
+ diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
6
+ that the entire file is readable on the diff.
7
+
8
+ The result are diffs that are much easier to read:
9
+
10
+ | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
+ | ------ | ------ | ------ |
12
+ | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
+ | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
14
+
15
+
16
+ This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
17
+ but now has extended functionality although not working as git driver.
18
+
19
+ ## Usage
20
+
21
+ ### Generating diffs
22
+
23
+ ```ruby
24
+ IpynbDiff.diff(from_path, to_path, options)
25
+ ```
26
+
27
+ Options:
28
+
29
+ ```ruby
30
+ @default_transform_options = {
31
+ preprocess_input: TRUE, # Whether the input should be transformed
32
+ write_output_to: nil, # Pass a path to save the output to a file
33
+ format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
34
+ sources_are_files: FALSE, # Weather to use the from/to as string or path to a file
35
+ raise_if_invalid_notebook: FALSE, # Raises an error if the notebooks are invalid, otherwise returns nil
36
+ transform_options: @default_transform_options, # See below for transform options
37
+ diff_opts: {
38
+ include_diff_info: FALSE # These are passed to Diffy https://github.com/samg/diffy
39
+ }
40
+ }
41
+ ```
42
+
43
+ ### Transforming the notebooks
44
+
45
+ It might be necessary to have the transformed files in addition to the diff.
46
+
47
+ ```ruby
48
+ IpynbDiff.transform(notebook, options)
49
+ ```
50
+
51
+ Options:
52
+
53
+ ```ruby
54
+ @default_transform_options = {
55
+ include_metadata: FALSE, # Whether to include or not the notebook metadata (kernel, language, etc)
56
+ cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
57
+ }
58
+ ```
data/ipynbdiff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'ipynbdiff'
5
- s.version = '0.3.2'
5
+ s.version = '0.3.6'
6
6
  s.summary = 'Human Readable diffs for Jupyter Notebooks'
7
7
  s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
8
8
  s.authors = ['Eduardo Bonet']
data/lib/ipynbdiff.rb CHANGED
@@ -2,42 +2,57 @@
2
2
 
3
3
  # Human Readable Jupyter Diffs
4
4
  module IpynbDiff
5
- require 'transformer.rb'
5
+ require 'transformer'
6
6
  require 'diffy'
7
7
 
8
- @default_options = {
8
+ @default_transform_options = {
9
+ include_metadata: FALSE,
10
+ cell_decorator: :html
11
+ }
12
+
13
+ @default_diff_options = {
9
14
  preprocess_input: TRUE,
10
15
  write_output_to: nil,
11
16
  format: :text,
12
17
  sources_are_files: FALSE,
13
- include_metadata: TRUE,
18
+ raise_if_invalid_notebook: FALSE,
19
+ transform_options: @default_transform_options,
14
20
  diff_opts: {
15
21
  include_diff_info: FALSE
16
22
  }
17
23
  }.freeze
18
24
 
19
25
  def self.prepare_input(to_prepare, options)
20
- prepared = to_prepare
21
- prepared = File.read(prepared) if options[:sources_are_files]
22
- prepared = Transformer.transform(prepared, include_metadata: options[:include_metadata]) if options[:preprocess_input]
26
+ return '' unless to_prepare
23
27
 
24
- prepared
28
+ prep = to_prepare
29
+ prep = File.read(prep) if options[:sources_are_files]
30
+ prep = transform(prep, raise_errors: TRUE, options: options[:transform_options]) if options[:preprocess_input]
31
+ prep
25
32
  end
26
33
 
27
34
  def self.diff(
28
35
  from_notebook,
29
36
  to_notebook,
30
- options = {}
37
+ options = @default_diff_options
31
38
  )
32
- options = @default_options.merge(options)
39
+ options = @default_diff_options.merge(options)
33
40
 
34
- from = from_notebook && prepare_input(from_notebook, options) || ''
35
- to = to_notebook && prepare_input(to_notebook, options) || ''
41
+ from = prepare_input(from_notebook, options)
42
+ to = prepare_input(to_notebook, options)
36
43
 
37
44
  d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
38
-
39
45
  File.write(options[:write_output_to], d) if options[:write_output_to]
40
-
41
46
  d
47
+ rescue InvalidNotebookError
48
+ raise if options[:raise_if_invalid_notebook]
49
+ end
50
+
51
+ def self.transform(notebook, raise_errors: FALSE, options: @default_transform_options)
52
+ options = @default_transform_options.merge(options)
53
+
54
+ Transformer.new(**options).transform(notebook)
55
+ rescue InvalidNotebookError
56
+ raise if raise_errors
42
57
  end
43
58
  end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+
5
+ # Transforms Jupyter output data into markdown
6
+ class OutputTransformer
7
+
8
+ ORDERED_KEYS = {
9
+ 'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
+ 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
+ }.freeze
12
+
13
+ def transform(output)
14
+ case (output_type = output['output_type'])
15
+ when 'error'
16
+ transform_error(output['traceback'])
17
+ when 'execute_result', 'display_data'
18
+ transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
+ end
20
+ end
21
+
22
+ def transform_error(traceback)
23
+ traceback.map do |t|
24
+ t.split("\n").map do |line|
25
+ line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
26
+ end
27
+ end
28
+ end
29
+
30
+ def transform_non_error(accepted_keys, elements)
31
+ accepted_keys.map do |key|
32
+ transform_element(key, elements[key]) if elements.key?(key)
33
+ end.flatten
34
+ end
35
+
36
+ def transform_element(output_type, output_element)
37
+ case output_type
38
+ when 'image/png', 'image/jpeg'
39
+ transform_image(output_type, output_element)
40
+ when 'image/svg+xml'
41
+ transform_svg(output_element)
42
+ when 'text/markdown', 'text/latex', 'text/plain'
43
+ transform_text(output_element)
44
+ end
45
+ end
46
+
47
+ def transform_image(image_type, image_content)
48
+ [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
49
+ end
50
+
51
+ def transform_svg(image_content)
52
+ single_line = image_content.map(&:strip).join('').gsub(/\s+/, ' ')
53
+
54
+ [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
55
+ end
56
+
57
+ def transform_text(text_content)
58
+ text_content.map { |line| " #{line}" }.append("\n")
59
+ end
60
+ end
61
+ end
data/lib/transformer.rb CHANGED
@@ -1,93 +1,92 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module IpynbDiff
4
+ class InvalidNotebookError < StandardError
5
+ end
6
+
4
7
  # Returns a markdown version of the Jupyter Notebook
5
8
  class Transformer
6
9
  require 'json'
7
10
  require 'yaml'
11
+ require 'output_transformer'
8
12
 
9
- def self.transform(notebook, include_metadata: TRUE)
10
- notebook_json = JSON.parse(notebook)
11
- transformed_blocks = notebook_json['cells'].map { |cell| transform_cell(cell, notebook_json) }
12
- transformed_blocks.prepend(transform_metadata(notebook_json)) if include_metadata
13
- transformed_blocks.join("\n")
13
+ @cell_decorator = :html
14
+ @include_metadata = TRUE
15
+
16
+
17
+ def initialize(include_metadata: TRUE, cell_decorator: :html)
18
+ @include_metadata = include_metadata
19
+ @cell_decorator = cell_decorator
20
+ @output_transformer = OutputTransformer.new
14
21
  end
15
22
 
16
- def self.transform_cell(cell, notebook)
17
- cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
23
+ def validate_notebook(notebook)
24
+ notebook_json = JSON.parse(notebook)
25
+
26
+ return notebook_json if notebook_json.key?('cells') && notebook_json.key?('metadata')
27
+
28
+ raise InvalidNotebookError
29
+ rescue JSON::ParserError
30
+ raise InvalidNotebookError
18
31
  end
19
32
 
20
- def self.transform_code_cell(cell, notebook)
21
- tags = cell['metadata'].fetch('tags', []).join(' ')
33
+ def transform(notebook)
34
+ notebook_json = validate_notebook(notebook)
35
+ transformed_blocks = notebook_json['cells'].map do |cell|
36
+ decorate_cell(transform_cell(cell, notebook_json), cell)
37
+ end
22
38
 
23
- [
24
- %(<div class="cell code" data-id="#{cell['id']}" data-tags="#{tags}">\n\n),
25
- %(``` #{notebook['metadata']['kernelspec']['language']}\n),
26
- *cell['source'],
27
- "\n```\n",
28
- *cell['outputs'].map { |output| transform_output(output) },
29
- "\n</div>\n"
30
- ].join('')
39
+ transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
40
+ transformed_blocks.join("\n")
31
41
  end
32
42
 
33
- def self.format_traceback(traceback)
34
- traceback.map do |t|
35
- t.split("\n").map do |line|
36
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip
37
- end
38
- end.join("\n")
43
+ def decorate_cell(rows, cell)
44
+ tags = cell['metadata']&.fetch('tags', [])
45
+ type = cell['cell_type'] || 'raw'
46
+
47
+ case @cell_decorator
48
+ when :html
49
+ rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
50
+ .append("\n</div>\n")
51
+ when :percent
52
+ rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
53
+ else
54
+ rows
55
+ end.join('')
39
56
  end
40
57
 
41
- def self.transform_execute_result(output)
42
- [
43
- %(\n<div class="output execute_result">\n\n),
44
- *output['data']['text/plain'].map { |line| " #{line}" },
45
- "\n\n</div>\n"
46
- ].join('')
58
+ def transform_cell(cell, notebook)
59
+ cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
47
60
  end
48
61
 
49
- def self.transform_image_result(output)
50
- if output['data'].key?('image/png')
51
- [
52
- %(\n<div class="output display_data">\n\n),
53
- "![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})",
54
- "\n\n</div>\n"
55
- ].join('')
62
+ def decorate_output(output_rows, output)
63
+ if @cell_decorator == :html
64
+ output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
65
+ else
66
+ output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
56
67
  end
57
68
  end
58
69
 
59
- def self.transform_error_result(output)
70
+ def transform_code_cell(cell, notebook)
60
71
  [
61
- %(\n<div class="output error">\n\n),
62
- format_traceback(output['traceback']),
63
- "\n\n</div>\n"
64
- ].join('')
72
+ %(``` #{notebook['metadata']['kernelspec']['language']}\n),
73
+ *cell['source'],
74
+ "\n```\n",
75
+ *cell['outputs'].map { |output| transform_output(output) }
76
+ ]
65
77
  end
66
78
 
67
- def self.transform_output(output)
68
- case output['output_type']
69
- when 'execute_result'
70
- transform_execute_result(output)
71
- when 'display_data'
72
- transform_image_result(output)
73
- when 'error'
74
- transform_error_result(output)
75
- end
76
- end
79
+ def transform_output(output)
80
+ transformed = @output_transformer.transform(output)
77
81
 
78
- def self.transform_text_cell(cell)
79
- tags = cell['metadata'].fetch('tags', []).join(' ')
80
- id = cell['id']
81
- cell_type = cell['cell_type']
82
+ decorate_output(transformed, output).join('') if transformed
83
+ end
82
84
 
83
- [
84
- %(<div class="cell #{cell_type}" data-id="#{id}" data-tags="#{tags}">\n\n),
85
- *cell['source'],
86
- "\n\n</div>\n"
87
- ].join('')
85
+ def transform_text_cell(cell)
86
+ cell['source'].append("\n")
88
87
  end
89
88
 
90
- def self.transform_metadata(notebook_json)
89
+ def transform_metadata(notebook_json)
91
90
  {
92
91
  'jupyter' => {
93
92
  'kernelspec' => notebook_json['metadata']['kernelspec'],
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-13 00:00:00.000000000 Z
11
+ date: 2021-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy
@@ -121,6 +121,7 @@ files:
121
121
  - README.md
122
122
  - ipynbdiff.gemspec
123
123
  - lib/ipynbdiff.rb
124
+ - lib/output_transformer.rb
124
125
  - lib/transformer.rb
125
126
  homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
126
127
  licenses: