ipynbdiff 0.3.2 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e457c9e3908f4ebdf452ec01428e71ca866730a6e36d4af344cb3e703fe989cc
4
- data.tar.gz: 4d12d8975526ad09c6f890af1283c7586773fbd8b2a43abda7d65772ac472818
3
+ metadata.gz: 10f77cf53513157724c6e1d8c8abc4659e3254dd77c3c14ebb2aaabd3155639c
4
+ data.tar.gz: 10815dc6a9cb76c73b4d6c9f712bdb28be9217a2b40efbcf076342e9f5d34811
5
5
  SHA512:
6
- metadata.gz: 3da474c1bad0797b392f51cb33692216d2326822b8654a507849c05d52d4631799c88ccbcd92bc8df1b01f72352b8031e782384f605c9d0abf07dcce9dd4a176
7
- data.tar.gz: ba36b69389d265d8ebd327a66d081688f242614938f51b0c3d644a07502aef1854ebb4804b20572ab9d81c64a7a45c4844b1bba6889ace7988ac85e95130a72d
6
+ metadata.gz: 06e895b990e7099d094b6e78c8a54fc6e5c618b28a9ae266cdb47a127189d67df7a7468f5a96fb2da21529f30911dfca761d7e4b0fc5452fc351e1878c697811
7
+ data.tar.gz: 54ee8fb62e05b130b304989787ee1032ffe41a2dc68dcdfde5bf7480588bdcd3a4f9a20c3b63eeb7cf21dd0ed89527b34d39a7ec15170f7cddda6114ea8fac0a
data/README.md CHANGED
@@ -1,3 +1,58 @@
1
- # rb-ipynbdiff: Better Jupyter Notebook diffs, in Ruby
1
+ # IpynbDiff: Better diff for Jupyter Notebooks
2
2
 
3
- This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff)
3
+ This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
4
+ into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
5
+ diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
6
+ that the entire file is readable on the diff.
7
+
8
+ The result are diffs that are much easier to read:
9
+
10
+ | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
+ | ------ | ------ | ------ |
12
+ | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
+ | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
14
+
15
+
16
+ This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
17
+ but now has extended functionality although not working as git driver.
18
+
19
+ ## Usage
20
+
21
+ ### Generating diffs
22
+
23
+ ```ruby
24
+ IpynbDiff.diff(from_path, to_path, options)
25
+ ```
26
+
27
+ Options:
28
+
29
+ ```ruby
30
+ @default_transform_options = {
31
+ preprocess_input: TRUE, # Whether the input should be transformed
32
+ write_output_to: nil, # Pass a path to save the output to a file
33
+ format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
34
+ sources_are_files: FALSE, # Weather to use the from/to as string or path to a file
35
+ raise_if_invalid_notebook: FALSE, # Raises an error if the notebooks are invalid, otherwise returns nil
36
+ transform_options: @default_transform_options, # See below for transform options
37
+ diff_opts: {
38
+ include_diff_info: FALSE # These are passed to Diffy https://github.com/samg/diffy
39
+ }
40
+ }
41
+ ```
42
+
43
+ ### Transforming the notebooks
44
+
45
+ It might be necessary to have the transformed files in addition to the diff.
46
+
47
+ ```ruby
48
+ IpynbDiff.transform(notebook, options)
49
+ ```
50
+
51
+ Options:
52
+
53
+ ```ruby
54
+ @default_transform_options = {
55
+ include_metadata: FALSE, # Whether to include or not the notebook metadata (kernel, language, etc)
56
+ cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
57
+ }
58
+ ```
data/ipynbdiff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'ipynbdiff'
5
- s.version = '0.3.2'
5
+ s.version = '0.3.6'
6
6
  s.summary = 'Human Readable diffs for Jupyter Notebooks'
7
7
  s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
8
8
  s.authors = ['Eduardo Bonet']
data/lib/ipynbdiff.rb CHANGED
@@ -2,42 +2,57 @@
2
2
 
3
3
  # Human Readable Jupyter Diffs
4
4
  module IpynbDiff
5
- require 'transformer.rb'
5
+ require 'transformer'
6
6
  require 'diffy'
7
7
 
8
- @default_options = {
8
+ @default_transform_options = {
9
+ include_metadata: FALSE,
10
+ cell_decorator: :html
11
+ }
12
+
13
+ @default_diff_options = {
9
14
  preprocess_input: TRUE,
10
15
  write_output_to: nil,
11
16
  format: :text,
12
17
  sources_are_files: FALSE,
13
- include_metadata: TRUE,
18
+ raise_if_invalid_notebook: FALSE,
19
+ transform_options: @default_transform_options,
14
20
  diff_opts: {
15
21
  include_diff_info: FALSE
16
22
  }
17
23
  }.freeze
18
24
 
19
25
  def self.prepare_input(to_prepare, options)
20
- prepared = to_prepare
21
- prepared = File.read(prepared) if options[:sources_are_files]
22
- prepared = Transformer.transform(prepared, include_metadata: options[:include_metadata]) if options[:preprocess_input]
26
+ return '' unless to_prepare
23
27
 
24
- prepared
28
+ prep = to_prepare
29
+ prep = File.read(prep) if options[:sources_are_files]
30
+ prep = transform(prep, raise_errors: TRUE, options: options[:transform_options]) if options[:preprocess_input]
31
+ prep
25
32
  end
26
33
 
27
34
  def self.diff(
28
35
  from_notebook,
29
36
  to_notebook,
30
- options = {}
37
+ options = @default_diff_options
31
38
  )
32
- options = @default_options.merge(options)
39
+ options = @default_diff_options.merge(options)
33
40
 
34
- from = from_notebook && prepare_input(from_notebook, options) || ''
35
- to = to_notebook && prepare_input(to_notebook, options) || ''
41
+ from = prepare_input(from_notebook, options)
42
+ to = prepare_input(to_notebook, options)
36
43
 
37
44
  d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
38
-
39
45
  File.write(options[:write_output_to], d) if options[:write_output_to]
40
-
41
46
  d
47
+ rescue InvalidNotebookError
48
+ raise if options[:raise_if_invalid_notebook]
49
+ end
50
+
51
+ def self.transform(notebook, raise_errors: FALSE, options: @default_transform_options)
52
+ options = @default_transform_options.merge(options)
53
+
54
+ Transformer.new(**options).transform(notebook)
55
+ rescue InvalidNotebookError
56
+ raise if raise_errors
42
57
  end
43
58
  end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+
5
+ # Transforms Jupyter output data into markdown
6
+ class OutputTransformer
7
+
8
+ ORDERED_KEYS = {
9
+ 'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
+ 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
+ }.freeze
12
+
13
+ def transform(output)
14
+ case (output_type = output['output_type'])
15
+ when 'error'
16
+ transform_error(output['traceback'])
17
+ when 'execute_result', 'display_data'
18
+ transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
+ end
20
+ end
21
+
22
+ def transform_error(traceback)
23
+ traceback.map do |t|
24
+ t.split("\n").map do |line|
25
+ line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
26
+ end
27
+ end
28
+ end
29
+
30
+ def transform_non_error(accepted_keys, elements)
31
+ accepted_keys.map do |key|
32
+ transform_element(key, elements[key]) if elements.key?(key)
33
+ end.flatten
34
+ end
35
+
36
+ def transform_element(output_type, output_element)
37
+ case output_type
38
+ when 'image/png', 'image/jpeg'
39
+ transform_image(output_type, output_element)
40
+ when 'image/svg+xml'
41
+ transform_svg(output_element)
42
+ when 'text/markdown', 'text/latex', 'text/plain'
43
+ transform_text(output_element)
44
+ end
45
+ end
46
+
47
+ def transform_image(image_type, image_content)
48
+ [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
49
+ end
50
+
51
+ def transform_svg(image_content)
52
+ single_line = image_content.map(&:strip).join('').gsub(/\s+/, ' ')
53
+
54
+ [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
55
+ end
56
+
57
+ def transform_text(text_content)
58
+ text_content.map { |line| " #{line}" }.append("\n")
59
+ end
60
+ end
61
+ end
data/lib/transformer.rb CHANGED
@@ -1,93 +1,92 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module IpynbDiff
4
+ class InvalidNotebookError < StandardError
5
+ end
6
+
4
7
  # Returns a markdown version of the Jupyter Notebook
5
8
  class Transformer
6
9
  require 'json'
7
10
  require 'yaml'
11
+ require 'output_transformer'
8
12
 
9
- def self.transform(notebook, include_metadata: TRUE)
10
- notebook_json = JSON.parse(notebook)
11
- transformed_blocks = notebook_json['cells'].map { |cell| transform_cell(cell, notebook_json) }
12
- transformed_blocks.prepend(transform_metadata(notebook_json)) if include_metadata
13
- transformed_blocks.join("\n")
13
+ @cell_decorator = :html
14
+ @include_metadata = TRUE
15
+
16
+
17
+ def initialize(include_metadata: TRUE, cell_decorator: :html)
18
+ @include_metadata = include_metadata
19
+ @cell_decorator = cell_decorator
20
+ @output_transformer = OutputTransformer.new
14
21
  end
15
22
 
16
- def self.transform_cell(cell, notebook)
17
- cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
23
+ def validate_notebook(notebook)
24
+ notebook_json = JSON.parse(notebook)
25
+
26
+ return notebook_json if notebook_json.key?('cells') && notebook_json.key?('metadata')
27
+
28
+ raise InvalidNotebookError
29
+ rescue JSON::ParserError
30
+ raise InvalidNotebookError
18
31
  end
19
32
 
20
- def self.transform_code_cell(cell, notebook)
21
- tags = cell['metadata'].fetch('tags', []).join(' ')
33
+ def transform(notebook)
34
+ notebook_json = validate_notebook(notebook)
35
+ transformed_blocks = notebook_json['cells'].map do |cell|
36
+ decorate_cell(transform_cell(cell, notebook_json), cell)
37
+ end
22
38
 
23
- [
24
- %(<div class="cell code" data-id="#{cell['id']}" data-tags="#{tags}">\n\n),
25
- %(``` #{notebook['metadata']['kernelspec']['language']}\n),
26
- *cell['source'],
27
- "\n```\n",
28
- *cell['outputs'].map { |output| transform_output(output) },
29
- "\n</div>\n"
30
- ].join('')
39
+ transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
40
+ transformed_blocks.join("\n")
31
41
  end
32
42
 
33
- def self.format_traceback(traceback)
34
- traceback.map do |t|
35
- t.split("\n").map do |line|
36
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip
37
- end
38
- end.join("\n")
43
+ def decorate_cell(rows, cell)
44
+ tags = cell['metadata']&.fetch('tags', [])
45
+ type = cell['cell_type'] || 'raw'
46
+
47
+ case @cell_decorator
48
+ when :html
49
+ rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
50
+ .append("\n</div>\n")
51
+ when :percent
52
+ rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
53
+ else
54
+ rows
55
+ end.join('')
39
56
  end
40
57
 
41
- def self.transform_execute_result(output)
42
- [
43
- %(\n<div class="output execute_result">\n\n),
44
- *output['data']['text/plain'].map { |line| " #{line}" },
45
- "\n\n</div>\n"
46
- ].join('')
58
+ def transform_cell(cell, notebook)
59
+ cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
47
60
  end
48
61
 
49
- def self.transform_image_result(output)
50
- if output['data'].key?('image/png')
51
- [
52
- %(\n<div class="output display_data">\n\n),
53
- "![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})",
54
- "\n\n</div>\n"
55
- ].join('')
62
+ def decorate_output(output_rows, output)
63
+ if @cell_decorator == :html
64
+ output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
65
+ else
66
+ output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
56
67
  end
57
68
  end
58
69
 
59
- def self.transform_error_result(output)
70
+ def transform_code_cell(cell, notebook)
60
71
  [
61
- %(\n<div class="output error">\n\n),
62
- format_traceback(output['traceback']),
63
- "\n\n</div>\n"
64
- ].join('')
72
+ %(``` #{notebook['metadata']['kernelspec']['language']}\n),
73
+ *cell['source'],
74
+ "\n```\n",
75
+ *cell['outputs'].map { |output| transform_output(output) }
76
+ ]
65
77
  end
66
78
 
67
- def self.transform_output(output)
68
- case output['output_type']
69
- when 'execute_result'
70
- transform_execute_result(output)
71
- when 'display_data'
72
- transform_image_result(output)
73
- when 'error'
74
- transform_error_result(output)
75
- end
76
- end
79
+ def transform_output(output)
80
+ transformed = @output_transformer.transform(output)
77
81
 
78
- def self.transform_text_cell(cell)
79
- tags = cell['metadata'].fetch('tags', []).join(' ')
80
- id = cell['id']
81
- cell_type = cell['cell_type']
82
+ decorate_output(transformed, output).join('') if transformed
83
+ end
82
84
 
83
- [
84
- %(<div class="cell #{cell_type}" data-id="#{id}" data-tags="#{tags}">\n\n),
85
- *cell['source'],
86
- "\n\n</div>\n"
87
- ].join('')
85
+ def transform_text_cell(cell)
86
+ cell['source'].append("\n")
88
87
  end
89
88
 
90
- def self.transform_metadata(notebook_json)
89
+ def transform_metadata(notebook_json)
91
90
  {
92
91
  'jupyter' => {
93
92
  'kernelspec' => notebook_json['metadata']['kernelspec'],
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-13 00:00:00.000000000 Z
11
+ date: 2021-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy
@@ -121,6 +121,7 @@ files:
121
121
  - README.md
122
122
  - ipynbdiff.gemspec
123
123
  - lib/ipynbdiff.rb
124
+ - lib/output_transformer.rb
124
125
  - lib/transformer.rb
125
126
  homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
126
127
  licenses: