ipynbdiff 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 80e13b121205b2811ffb5d508e0978284562ced1b359156530b32ec2e10de6d4
4
- data.tar.gz: 9a64f732b7e2ca0143bab893eb8e7713761f533d79798935d137d88d58f401e6
3
+ metadata.gz: d87c49e29a069b5064985b1b1097ac4d02c375161d9fe64df8e70f0155419220
4
+ data.tar.gz: 97931332962139f7f129a546bb7a7f9b670d779dcb032b27fbb1a3faa6f7dc85
5
5
  SHA512:
6
- metadata.gz: 27f12477595add7a90f8f8d3ebf3b46b32ae24ec5c25a9417b03ff52d0be47ca68db501a2feca3337a0e8cf8ccee257cceccb7b9cbbf6716680641fb00a6a7f7
7
- data.tar.gz: b1c3cd5e12a5f949be5e14c1dd2b69b93304b4b507787c91f8d7cc383855bcf52dabf2edbfe4a67e4410e20d575686f71dbfe5d2b7b4853678acf978555c4cd4
6
+ metadata.gz: 86c4be0fc53e0db2172221a17dd07e0c3f37492ffc699075b0de74dea3265feebee2207ef4e9170ead59be15c42b858918fbd2e56c7ed109cb674fbf7eb7dbbb
7
+ data.tar.gz: 10fdb124114a73425edc57f5c9fb80fbd546f6b8e7d58929a327f1be268774d8409238630286aa050e41384bf53c951d2b8c67aaeb76835bc3e0f37672775d48
data/README.md CHANGED
@@ -1,3 +1,57 @@
1
- # rb-ipynbdiff: Better Jupyter Notebook diffs, in Ruby
1
+ # IpynbDiff: Better diff for Jupyter Notebooks
2
2
 
3
- This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff)
3
+ This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
4
+ into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
5
+ diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
6
+ that the entire file is readable on the diff.
7
+
8
+ The result are diffs that are much easier to read:
9
+
10
+ | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
+ | ------ | ------ | ------ |
12
+ | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
+ | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
14
+
15
+
16
+ This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
17
+ but now has extended functionality although not working as git driver.
18
+
19
+ ## Usage
20
+
21
+ ### Generating diffs
22
+
23
+ ```ruby
24
+ IpynbDiff.diff(from_path, to_path, options)
25
+ ```
26
+
27
+ Options:
28
+
29
+ ```ruby
30
+ @default_transform_options = {
31
+ preprocess_input: TRUE, # Whether the input should be transformed
32
+ write_output_to: nil, # Pass a path to save the output to a file
33
+ format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
34
+ sources_are_files: FALSE, # Weather to use the from/to as string or path to a file
35
+ transform_options: @default_transform_options, # See below for transform options
36
+ diff_opts: {
37
+ include_diff_info: FALSE # These are passed to Diffy https://github.com/samg/diffy
38
+ }
39
+ }
40
+ ```
41
+
42
+ ### Transforming the notebooks
43
+
44
+ It might be necessary to have the transformed files in addition to the diff.
45
+
46
+ ```ruby
47
+ IpynbDiff.transform(notebook, options)
48
+ ```
49
+
50
+ Options:
51
+
52
+ ```ruby
53
+ @default_transform_options = {
54
+ include_metadata: FALSE, # Whether to include or not the notebook metadata (kernel, language, etc)
55
+ cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
56
+ }
57
+ ```
data/ipynbdiff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'ipynbdiff'
5
- s.version = '0.3.3'
5
+ s.version = '0.3.4'
6
6
  s.summary = 'Human Readable diffs for Jupyter Notebooks'
7
7
  s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
8
8
  s.authors = ['Eduardo Bonet']
data/lib/ipynbdiff.rb CHANGED
@@ -2,15 +2,20 @@
2
2
 
3
3
  # Human Readable Jupyter Diffs
4
4
  module IpynbDiff
5
- require 'transformer.rb'
5
+ require 'transformer'
6
6
  require 'diffy'
7
7
 
8
- @default_options = {
8
+ @default_transform_options = {
9
+ include_metadata: FALSE,
10
+ cell_decorator: :html
11
+ }
12
+
13
+ @default_diff_options = {
9
14
  preprocess_input: TRUE,
10
15
  write_output_to: nil,
11
16
  format: :text,
12
17
  sources_are_files: FALSE,
13
- include_metadata: TRUE,
18
+ transform_options: @default_transform_options,
14
19
  diff_opts: {
15
20
  include_diff_info: FALSE
16
21
  }
@@ -19,7 +24,7 @@ module IpynbDiff
19
24
  def self.prepare_input(to_prepare, options)
20
25
  prepared = to_prepare
21
26
  prepared = File.read(prepared) if options[:sources_are_files]
22
- prepared = Transformer.transform(prepared, include_metadata: options[:include_metadata]) if options[:preprocess_input]
27
+ prepared = transform(prepared, options[:transform_options]) if options[:preprocess_input]
23
28
 
24
29
  prepared
25
30
  end
@@ -29,7 +34,7 @@ module IpynbDiff
29
34
  to_notebook,
30
35
  options = {}
31
36
  )
32
- options = @default_options.merge(options)
37
+ options = @default_diff_options.merge(options)
33
38
 
34
39
  from = from_notebook && prepare_input(from_notebook, options) || ''
35
40
  to = to_notebook && prepare_input(to_notebook, options) || ''
@@ -40,4 +45,9 @@ module IpynbDiff
40
45
 
41
46
  d
42
47
  end
48
+
49
+ def self.transform(notebook, options)
50
+ options = @default_transform_options.merge(options)
51
+ Transformer.new(**options).transform(notebook)
52
+ end
43
53
  end
data/lib/transformer.rb CHANGED
@@ -6,88 +6,101 @@ module IpynbDiff
6
6
  require 'json'
7
7
  require 'yaml'
8
8
 
9
- def self.transform(notebook, include_metadata: TRUE)
9
+ @cell_decorator = :html
10
+ @include_metadata = TRUE
11
+
12
+ def initialize(include_metadata: TRUE, cell_decorator: :html)
13
+ @include_metadata = include_metadata
14
+ @cell_decorator = cell_decorator
15
+ end
16
+
17
+ def transform(notebook)
10
18
  notebook_json = JSON.parse(notebook)
11
- transformed_blocks = notebook_json['cells'].map { |cell| transform_cell(cell, notebook_json) }
12
- transformed_blocks.prepend(transform_metadata(notebook_json)) if include_metadata
19
+ transformed_blocks = notebook_json['cells'].map do |cell|
20
+ decorate_cell(transform_cell(cell, notebook_json), cell)
21
+ end
22
+
23
+ transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
13
24
  transformed_blocks.join("\n")
14
25
  end
15
26
 
16
- def self.transform_cell(cell, notebook)
27
+ def decorate_cell(rows, cell)
28
+ tags = cell['metadata']&.fetch('tags', [])
29
+ type = cell['cell_type']
30
+
31
+ case @cell_decorator
32
+ when :html
33
+ rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
34
+ .append("\n</div>\n")
35
+ when :percent
36
+ rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
37
+ else
38
+ rows
39
+ end.join('')
40
+ end
41
+
42
+ def transform_cell(cell, notebook)
17
43
  cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
18
44
  end
19
45
 
20
- def self.transform_code_cell(cell, notebook)
21
- tags = cell['metadata']&.fetch('tags', [])&.join(' ') || ''
46
+ def decorate_output(output_rows, output)
47
+ if @cell_decorator == :html
48
+ output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
49
+ else
50
+ output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
51
+ end
52
+ end
22
53
 
54
+ def transform_code_cell(cell, notebook)
23
55
  [
24
- %(<div class="cell code" data-id="#{cell['id']}" data-tags="#{tags}">\n\n),
25
56
  %(``` #{notebook['metadata']['kernelspec']['language']}\n),
26
57
  *cell['source'],
27
58
  "\n```\n",
28
- *cell['outputs'].map { |output| transform_output(output) },
29
- "\n</div>\n"
30
- ].join('')
59
+ *cell['outputs'].map { |output| transform_output(output) }
60
+ ]
31
61
  end
32
62
 
33
- def self.format_traceback(traceback)
63
+ def format_traceback(traceback)
34
64
  traceback.map do |t|
35
65
  t.split("\n").map do |line|
36
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip
66
+ line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
37
67
  end
38
- end.join("\n")
68
+ end
39
69
  end
40
70
 
41
- def self.transform_execute_result(output)
42
- [
43
- %(\n<div class="output execute_result">\n\n),
44
- *output['data']['text/plain'].map { |line| " #{line}" },
45
- "\n\n</div>\n"
46
- ].join('')
71
+ def transform_execute_result(output)
72
+ output['data']['text/plain'].map { |line| " #{line}" }.append("\n")
47
73
  end
48
74
 
49
- def self.transform_image_result(output)
75
+ def transform_image_result(output)
50
76
  if output['data'].key?('image/png')
51
- [
52
- %(\n<div class="output display_data">\n\n),
53
- "![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})",
54
- "\n\n</div>\n"
55
- ].join('')
77
+ ["![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})", "\n"]
56
78
  end
57
79
  end
58
80
 
59
- def self.transform_error_result(output)
60
- [
61
- %(\n<div class="output error">\n\n),
62
- format_traceback(output['traceback']),
63
- "\n\n</div>\n"
64
- ].join('')
81
+ def transform_error_result(output)
82
+ format_traceback(output['traceback'])
65
83
  end
66
84
 
67
- def self.transform_output(output)
68
- case output['output_type']
69
- when 'execute_result'
70
- transform_execute_result(output)
71
- when 'display_data'
72
- transform_image_result(output)
73
- when 'error'
74
- transform_error_result(output)
75
- end
76
- end
85
+ def transform_output(output)
86
+ transformed =
87
+ case output['output_type']
88
+ when 'execute_result'
89
+ transform_execute_result(output)
90
+ when 'display_data'
91
+ transform_image_result(output)
92
+ when 'error'
93
+ transform_error_result(output)
94
+ end
77
95
 
78
- def self.transform_text_cell(cell)
79
- tags = cell['metadata']&.fetch('tags', [])&.join(' ') || ''
80
- id = cell['id']
81
- cell_type = cell['cell_type']
96
+ decorate_output(transformed, output).join('') if transformed
97
+ end
82
98
 
83
- [
84
- %(<div class="cell #{cell_type}" data-id="#{id}" data-tags="#{tags}">\n\n),
85
- *cell['source'],
86
- "\n\n</div>\n"
87
- ].join('')
99
+ def transform_text_cell(cell)
100
+ cell['source'].append("\n")
88
101
  end
89
102
 
90
- def self.transform_metadata(notebook_json)
103
+ def transform_metadata(notebook_json)
91
104
  {
92
105
  'jupyter' => {
93
106
  'kernelspec' => notebook_json['metadata']['kernelspec'],
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-13 00:00:00.000000000 Z
11
+ date: 2021-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy