ipynbdiff 0.3.1 → 0.3.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0839dd5e5fbcdf19b5525d859fa7d224ec1bb647d7131f9cc20487190ccea80
4
- data.tar.gz: ca32de2c784712f66f2f1c8b5642b32fca0078eae0279c8b30f975216101d143
3
+ metadata.gz: 54c011ed4790dc548b33d6e4773bb9bbdaedd7b8c1ffbdbcfa5e494e5380b1a9
4
+ data.tar.gz: 70f78c722ddc432576ec52d55617e4ce5bdf29cb7ff32ad46219ac52d0d6f5d5
5
5
  SHA512:
6
- metadata.gz: 45353d2e38a4378cb5f785edb6b38d250bf38ce4934f8943dcbe08973e4c187942449d185d4a3af59649d9459c74071475b33f23033c192b4345b24f95a2fd78
7
- data.tar.gz: 7d8df2c34356018dfe098c0f36e8fb8f3c189c8121411826b9923b406ead730856a9e46ca57124b4e4e23e0409e1fdf83c5785686ab6aa0254bcedca07b95246
6
+ metadata.gz: 757f9ea284f358771835db2e27ebe90cb5c8c99336313a65d04cbd0080c0891e1c25af79740cb4d191eeafec10e66c74baceef1ba24729b6ba6f743d502564fa
7
+ data.tar.gz: 35866e056ae8f4ab043bd9cc247f6c3f8dc3b02d475e94221ca11352badf1e96cf840e8020553ffa8c32b1359f967c74ccc67e0f4abda376e8a035d027871daf
data/README.md CHANGED
@@ -1,3 +1,58 @@
1
- # rb-ipynbdiff: Better Jupyter Notebook diffs, in Ruby
1
+ # IpynbDiff: Better diff for Jupyter Notebooks
2
2
 
3
- This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff)
3
+ This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
4
+ into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
5
+ diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
6
+ that the entire file is readable on the diff.
7
+
8
+ The result are diffs that are much easier to read:
9
+
10
+ | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
+ | ------ | ------ | ------ |
12
+ | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
+ | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
14
+
15
+
16
+ This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
17
+ but now has extended functionality although not working as git driver.
18
+
19
+ ## Usage
20
+
21
+ ### Generating diffs
22
+
23
+ ```ruby
24
+ IpynbDiff.diff(from_path, to_path, options)
25
+ ```
26
+
27
+ Options:
28
+
29
+ ```ruby
30
+ @default_transform_options = {
31
+ preprocess_input: TRUE, # Whether the input should be transformed
32
+ write_output_to: nil, # Pass a path to save the output to a file
33
+ format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
34
+ sources_are_files: FALSE, # Weather to use the from/to as string or path to a file
35
+ raise_if_invalid_notebook: FALSE, # Raises an error if the notebooks are invalid, otherwise returns nil
36
+ transform_options: @default_transform_options, # See below for transform options
37
+ diff_opts: {
38
+ include_diff_info: FALSE # These are passed to Diffy https://github.com/samg/diffy
39
+ }
40
+ }
41
+ ```
42
+
43
+ ### Transforming the notebooks
44
+
45
+ It might be necessary to have the transformed files in addition to the diff.
46
+
47
+ ```ruby
48
+ IpynbDiff.transform(notebook, options)
49
+ ```
50
+
51
+ Options:
52
+
53
+ ```ruby
54
+ @default_transform_options = {
55
+ include_metadata: FALSE, # Whether to include or not the notebook metadata (kernel, language, etc)
56
+ cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
57
+ }
58
+ ```
data/ipynbdiff.gemspec CHANGED
@@ -2,9 +2,9 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'ipynbdiff'
5
- s.version = '0.3.1'
6
- s.summary = 'Human Readble diffs for Jupyter Notebooks'
7
- s.description = 'A simple hello world gem'
5
+ s.version = '0.3.5'
6
+ s.summary = 'Human Readable diffs for Jupyter Notebooks'
7
+ s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
8
8
  s.authors = ['Eduardo Bonet']
9
9
  s.email = 'ebonet@gitlab.com'
10
10
  # Specify which files should be added to the gem when it is released.
data/lib/ipynbdiff.rb CHANGED
@@ -2,41 +2,57 @@
2
2
 
3
3
  # Human Readable Jupyter Diffs
4
4
  module IpynbDiff
5
- require 'transformer.rb'
5
+ require 'transformer'
6
6
  require 'diffy'
7
7
 
8
- @default_options = {
8
+ @default_transform_options = {
9
+ include_metadata: FALSE,
10
+ cell_decorator: :html
11
+ }
12
+
13
+ @default_diff_options = {
9
14
  preprocess_input: TRUE,
10
15
  write_output_to: nil,
11
16
  format: :text,
12
17
  sources_are_files: FALSE,
18
+ raise_if_invalid_notebook: FALSE,
19
+ transform_options: @default_transform_options,
13
20
  diff_opts: {
14
21
  include_diff_info: FALSE
15
22
  }
16
23
  }.freeze
17
24
 
18
- def self.prepare_input(to_prepare, load_from_file, preprocess)
19
- prepared = to_prepare
20
- prepared = File.read(prepared) if load_from_file
21
- prepared = Transformer.transform(prepared) if preprocess
25
+ def self.prepare_input(to_prepare, options)
26
+ return '' unless to_prepare
22
27
 
23
- prepared
28
+ prep = to_prepare
29
+ prep = File.read(prep) if options[:sources_are_files]
30
+ prep = transform(prep, raise_errors: TRUE, options: options[:transform_options]) if options[:preprocess_input]
31
+ prep
24
32
  end
25
33
 
26
34
  def self.diff(
27
35
  from_notebook,
28
36
  to_notebook,
29
- options = {}
37
+ options = @default_diff_options
30
38
  )
31
- options = @default_options.merge(options)
39
+ options = @default_diff_options.merge(options)
32
40
 
33
- from = prepare_input(from_notebook, options[:sources_are_files], options[:preprocess_input])
34
- to = prepare_input(to_notebook, options[:sources_are_files], options[:preprocess_input])
41
+ from = prepare_input(from_notebook, options)
42
+ to = prepare_input(to_notebook, options)
35
43
 
36
44
  d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
37
-
38
45
  File.write(options[:write_output_to], d) if options[:write_output_to]
39
-
40
46
  d
47
+ rescue InvalidNotebookError
48
+ raise if options[:raise_if_invalid_notebook]
49
+ end
50
+
51
+ def self.transform(notebook, raise_errors: FALSE, options: @default_transform_options)
52
+ options = @default_transform_options.merge(options)
53
+
54
+ Transformer.new(**options).transform(notebook)
55
+ rescue InvalidNotebookError
56
+ raise if raise_errors
41
57
  end
42
58
  end
data/lib/transformer.rb CHANGED
@@ -1,93 +1,119 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module IpynbDiff
4
+ class InvalidNotebookError < StandardError
5
+ end
6
+
4
7
  # Returns a markdown version of the Jupyter Notebook
5
8
  class Transformer
6
9
  require 'json'
7
10
  require 'yaml'
8
11
 
9
- def self.transform(notebook, include_metadata: TRUE)
12
+ @cell_decorator = :html
13
+ @include_metadata = TRUE
14
+
15
+ def initialize(include_metadata: TRUE, cell_decorator: :html)
16
+ @include_metadata = include_metadata
17
+ @cell_decorator = cell_decorator
18
+ end
19
+
20
+ def validate_notebook(notebook)
10
21
  notebook_json = JSON.parse(notebook)
11
- transformed_blocks = notebook_json['cells'].map { |cell| transform_cell(cell, notebook_json) }
12
- transformed_blocks.prepend(transform_metadata(notebook_json)) if include_metadata
22
+
23
+ return notebook_json if notebook_json.key?('cells') && notebook_json.key?('metadata')
24
+
25
+ raise InvalidNotebookError
26
+ rescue JSON::ParserError
27
+ raise InvalidNotebookError
28
+ end
29
+
30
+ def transform(notebook)
31
+ notebook_json = validate_notebook(notebook)
32
+ transformed_blocks = notebook_json['cells'].map do |cell|
33
+ decorate_cell(transform_cell(cell, notebook_json), cell)
34
+ end
35
+
36
+ transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
13
37
  transformed_blocks.join("\n")
14
38
  end
15
39
 
16
- def self.transform_cell(cell, notebook)
40
+ def decorate_cell(rows, cell)
41
+ tags = cell['metadata']&.fetch('tags', [])
42
+ type = cell['cell_type'] || 'raw'
43
+
44
+ case @cell_decorator
45
+ when :html
46
+ rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
47
+ .append("\n</div>\n")
48
+ when :percent
49
+ rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
50
+ else
51
+ rows
52
+ end.join('')
53
+ end
54
+
55
+ def transform_cell(cell, notebook)
17
56
  cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
18
57
  end
19
58
 
20
- def self.transform_code_cell(cell, notebook)
21
- tags = cell['metadata'].fetch('tags', []).join(' ')
59
+ def decorate_output(output_rows, output)
60
+ if @cell_decorator == :html
61
+ output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
62
+ else
63
+ output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
64
+ end
65
+ end
22
66
 
67
+ def transform_code_cell(cell, notebook)
23
68
  [
24
- %(<div class="cell code" data-id="#{cell['id']}" data-tags="#{tags}">\n\n),
25
69
  %(``` #{notebook['metadata']['kernelspec']['language']}\n),
26
70
  *cell['source'],
27
71
  "\n```\n",
28
- *cell['outputs'].map { |output| transform_output(output) },
29
- "\n</div>\n"
30
- ].join('')
72
+ *cell['outputs'].map { |output| transform_output(output) }
73
+ ]
31
74
  end
32
75
 
33
- def self.format_traceback(traceback)
76
+ def format_traceback(traceback)
34
77
  traceback.map do |t|
35
78
  t.split("\n").map do |line|
36
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip
79
+ line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
37
80
  end
38
- end.join("\n")
81
+ end
39
82
  end
40
83
 
41
- def self.transform_execute_result(output)
42
- [
43
- %(\n<div class="output execute_result">\n\n),
44
- *output['data']['text/plain'].map { |line| " #{line}" },
45
- "\n\n</div>\n"
46
- ].join('')
84
+ def transform_execute_result(output)
85
+ output['data']['text/plain'].map { |line| " #{line}" }.append("\n")
47
86
  end
48
87
 
49
- def self.transform_image_result(output)
88
+ def transform_image_result(output)
50
89
  if output['data'].key?('image/png')
51
- [
52
- %(\n<div class="output display_data">\n\n),
53
- "![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})",
54
- "\n\n</div>\n"
55
- ].join('')
90
+ ["![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})", "\n"]
56
91
  end
57
92
  end
58
93
 
59
- def self.transform_error_result(output)
60
- [
61
- %(\n<div class="output error">\n\n),
62
- format_traceback(output['traceback']),
63
- "\n\n</div>\n"
64
- ].join('')
94
+ def transform_error_result(output)
95
+ format_traceback(output['traceback'])
65
96
  end
66
97
 
67
- def self.transform_output(output)
68
- case output['output_type']
69
- when 'execute_result'
70
- transform_execute_result(output)
71
- when 'display_data'
72
- transform_image_result(output)
73
- when 'error'
74
- transform_error_result(output)
75
- end
76
- end
98
+ def transform_output(output)
99
+ transformed =
100
+ case output['output_type']
101
+ when 'execute_result'
102
+ transform_execute_result(output)
103
+ when 'display_data'
104
+ transform_image_result(output)
105
+ when 'error'
106
+ transform_error_result(output)
107
+ end
77
108
 
78
- def self.transform_text_cell(cell)
79
- tags = cell['metadata'].fetch('tags', []).join(' ')
80
- id = cell['id']
81
- cell_type = cell['cell_type']
109
+ decorate_output(transformed, output).join('') if transformed
110
+ end
82
111
 
83
- [
84
- %(<div class="cell #{cell_type}" data-id="#{id}" data-tags="#{tags}">\n\n),
85
- *cell['source'],
86
- "\n\n</div>\n"
87
- ].join('')
112
+ def transform_text_cell(cell)
113
+ cell['source'].append("\n")
88
114
  end
89
115
 
90
- def self.transform_metadata(notebook_json)
116
+ def transform_metadata(notebook_json)
91
117
  {
92
118
  'jupyter' => {
93
119
  'kernelspec' => notebook_json['metadata']['kernelspec'],
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-13 00:00:00.000000000 Z
11
+ date: 2021-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy
@@ -108,7 +108,8 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
- description: A simple hello world gem
111
+ description: Better diff for Jupyter Notebooks by first preprocessing them and removing
112
+ clutter
112
113
  email: ebonet@gitlab.com
113
114
  executables: []
114
115
  extensions: []
@@ -143,5 +144,5 @@ requirements: []
143
144
  rubygems_version: 3.1.6
144
145
  signing_key:
145
146
  specification_version: 4
146
- summary: Human Readble diffs for Jupyter Notebooks
147
+ summary: Human Readable diffs for Jupyter Notebooks
147
148
  test_files: []