ipynbdiff 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 80e13b121205b2811ffb5d508e0978284562ced1b359156530b32ec2e10de6d4
4
- data.tar.gz: 9a64f732b7e2ca0143bab893eb8e7713761f533d79798935d137d88d58f401e6
3
+ metadata.gz: d87c49e29a069b5064985b1b1097ac4d02c375161d9fe64df8e70f0155419220
4
+ data.tar.gz: 97931332962139f7f129a546bb7a7f9b670d779dcb032b27fbb1a3faa6f7dc85
5
5
  SHA512:
6
- metadata.gz: 27f12477595add7a90f8f8d3ebf3b46b32ae24ec5c25a9417b03ff52d0be47ca68db501a2feca3337a0e8cf8ccee257cceccb7b9cbbf6716680641fb00a6a7f7
7
- data.tar.gz: b1c3cd5e12a5f949be5e14c1dd2b69b93304b4b507787c91f8d7cc383855bcf52dabf2edbfe4a67e4410e20d575686f71dbfe5d2b7b4853678acf978555c4cd4
6
+ metadata.gz: 86c4be0fc53e0db2172221a17dd07e0c3f37492ffc699075b0de74dea3265feebee2207ef4e9170ead59be15c42b858918fbd2e56c7ed109cb674fbf7eb7dbbb
7
+ data.tar.gz: 10fdb124114a73425edc57f5c9fb80fbd546f6b8e7d58929a327f1be268774d8409238630286aa050e41384bf53c951d2b8c67aaeb76835bc3e0f37672775d48
data/README.md CHANGED
@@ -1,3 +1,57 @@
1
- # rb-ipynbdiff: Better Jupyter Notebook diffs, in Ruby
1
+ # IpynbDiff: Better diff for Jupyter Notebooks
2
2
 
3
- This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff)
3
+ This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
4
+ into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
5
+ diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
6
+ that the entire file is readable on the diff.
7
+
8
+ The result are diffs that are much easier to read:
9
+
10
+ | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
+ | ------ | ------ | ------ |
12
+ | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
+ | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
14
+
15
+
16
+ This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
17
+ but now has extended functionality although not working as git driver.
18
+
19
+ ## Usage
20
+
21
+ ### Generating diffs
22
+
23
+ ```ruby
24
+ IpynbDiff.diff(from_path, to_path, options)
25
+ ```
26
+
27
+ Options:
28
+
29
+ ```ruby
30
+ @default_transform_options = {
31
+ preprocess_input: TRUE, # Whether the input should be transformed
32
+ write_output_to: nil, # Pass a path to save the output to a file
33
+ format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
34
+ sources_are_files: FALSE, # Weather to use the from/to as string or path to a file
35
+ transform_options: @default_transform_options, # See below for transform options
36
+ diff_opts: {
37
+ include_diff_info: FALSE # These are passed to Diffy https://github.com/samg/diffy
38
+ }
39
+ }
40
+ ```
41
+
42
+ ### Transforming the notebooks
43
+
44
+ It might be necessary to have the transformed files in addition to the diff.
45
+
46
+ ```ruby
47
+ IpynbDiff.transform(notebook, options)
48
+ ```
49
+
50
+ Options:
51
+
52
+ ```ruby
53
+ @default_transform_options = {
54
+ include_metadata: FALSE, # Whether to include or not the notebook metadata (kernel, language, etc)
55
+ cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
56
+ }
57
+ ```
data/ipynbdiff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'ipynbdiff'
5
- s.version = '0.3.3'
5
+ s.version = '0.3.4'
6
6
  s.summary = 'Human Readable diffs for Jupyter Notebooks'
7
7
  s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
8
8
  s.authors = ['Eduardo Bonet']
data/lib/ipynbdiff.rb CHANGED
@@ -2,15 +2,20 @@
2
2
 
3
3
  # Human Readable Jupyter Diffs
4
4
  module IpynbDiff
5
- require 'transformer.rb'
5
+ require 'transformer'
6
6
  require 'diffy'
7
7
 
8
- @default_options = {
8
+ @default_transform_options = {
9
+ include_metadata: FALSE,
10
+ cell_decorator: :html
11
+ }
12
+
13
+ @default_diff_options = {
9
14
  preprocess_input: TRUE,
10
15
  write_output_to: nil,
11
16
  format: :text,
12
17
  sources_are_files: FALSE,
13
- include_metadata: TRUE,
18
+ transform_options: @default_transform_options,
14
19
  diff_opts: {
15
20
  include_diff_info: FALSE
16
21
  }
@@ -19,7 +24,7 @@ module IpynbDiff
19
24
  def self.prepare_input(to_prepare, options)
20
25
  prepared = to_prepare
21
26
  prepared = File.read(prepared) if options[:sources_are_files]
22
- prepared = Transformer.transform(prepared, include_metadata: options[:include_metadata]) if options[:preprocess_input]
27
+ prepared = transform(prepared, options[:transform_options]) if options[:preprocess_input]
23
28
 
24
29
  prepared
25
30
  end
@@ -29,7 +34,7 @@ module IpynbDiff
29
34
  to_notebook,
30
35
  options = {}
31
36
  )
32
- options = @default_options.merge(options)
37
+ options = @default_diff_options.merge(options)
33
38
 
34
39
  from = from_notebook && prepare_input(from_notebook, options) || ''
35
40
  to = to_notebook && prepare_input(to_notebook, options) || ''
@@ -40,4 +45,9 @@ module IpynbDiff
40
45
 
41
46
  d
42
47
  end
48
+
49
+ def self.transform(notebook, options)
50
+ options = @default_transform_options.merge(options)
51
+ Transformer.new(**options).transform(notebook)
52
+ end
43
53
  end
data/lib/transformer.rb CHANGED
@@ -6,88 +6,101 @@ module IpynbDiff
6
6
  require 'json'
7
7
  require 'yaml'
8
8
 
9
- def self.transform(notebook, include_metadata: TRUE)
9
+ @cell_decorator = :html
10
+ @include_metadata = TRUE
11
+
12
+ def initialize(include_metadata: TRUE, cell_decorator: :html)
13
+ @include_metadata = include_metadata
14
+ @cell_decorator = cell_decorator
15
+ end
16
+
17
+ def transform(notebook)
10
18
  notebook_json = JSON.parse(notebook)
11
- transformed_blocks = notebook_json['cells'].map { |cell| transform_cell(cell, notebook_json) }
12
- transformed_blocks.prepend(transform_metadata(notebook_json)) if include_metadata
19
+ transformed_blocks = notebook_json['cells'].map do |cell|
20
+ decorate_cell(transform_cell(cell, notebook_json), cell)
21
+ end
22
+
23
+ transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
13
24
  transformed_blocks.join("\n")
14
25
  end
15
26
 
16
- def self.transform_cell(cell, notebook)
27
+ def decorate_cell(rows, cell)
28
+ tags = cell['metadata']&.fetch('tags', [])
29
+ type = cell['cell_type']
30
+
31
+ case @cell_decorator
32
+ when :html
33
+ rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
34
+ .append("\n</div>\n")
35
+ when :percent
36
+ rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
37
+ else
38
+ rows
39
+ end.join('')
40
+ end
41
+
42
+ def transform_cell(cell, notebook)
17
43
  cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
18
44
  end
19
45
 
20
- def self.transform_code_cell(cell, notebook)
21
- tags = cell['metadata']&.fetch('tags', [])&.join(' ') || ''
46
+ def decorate_output(output_rows, output)
47
+ if @cell_decorator == :html
48
+ output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
49
+ else
50
+ output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
51
+ end
52
+ end
22
53
 
54
+ def transform_code_cell(cell, notebook)
23
55
  [
24
- %(<div class="cell code" data-id="#{cell['id']}" data-tags="#{tags}">\n\n),
25
56
  %(``` #{notebook['metadata']['kernelspec']['language']}\n),
26
57
  *cell['source'],
27
58
  "\n```\n",
28
- *cell['outputs'].map { |output| transform_output(output) },
29
- "\n</div>\n"
30
- ].join('')
59
+ *cell['outputs'].map { |output| transform_output(output) }
60
+ ]
31
61
  end
32
62
 
33
- def self.format_traceback(traceback)
63
+ def format_traceback(traceback)
34
64
  traceback.map do |t|
35
65
  t.split("\n").map do |line|
36
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip
66
+ line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
37
67
  end
38
- end.join("\n")
68
+ end
39
69
  end
40
70
 
41
- def self.transform_execute_result(output)
42
- [
43
- %(\n<div class="output execute_result">\n\n),
44
- *output['data']['text/plain'].map { |line| " #{line}" },
45
- "\n\n</div>\n"
46
- ].join('')
71
+ def transform_execute_result(output)
72
+ output['data']['text/plain'].map { |line| " #{line}" }.append("\n")
47
73
  end
48
74
 
49
- def self.transform_image_result(output)
75
+ def transform_image_result(output)
50
76
  if output['data'].key?('image/png')
51
- [
52
- %(\n<div class="output display_data">\n\n),
53
- "![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})",
54
- "\n\n</div>\n"
55
- ].join('')
77
+ ["![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})", "\n"]
56
78
  end
57
79
  end
58
80
 
59
- def self.transform_error_result(output)
60
- [
61
- %(\n<div class="output error">\n\n),
62
- format_traceback(output['traceback']),
63
- "\n\n</div>\n"
64
- ].join('')
81
+ def transform_error_result(output)
82
+ format_traceback(output['traceback'])
65
83
  end
66
84
 
67
- def self.transform_output(output)
68
- case output['output_type']
69
- when 'execute_result'
70
- transform_execute_result(output)
71
- when 'display_data'
72
- transform_image_result(output)
73
- when 'error'
74
- transform_error_result(output)
75
- end
76
- end
85
+ def transform_output(output)
86
+ transformed =
87
+ case output['output_type']
88
+ when 'execute_result'
89
+ transform_execute_result(output)
90
+ when 'display_data'
91
+ transform_image_result(output)
92
+ when 'error'
93
+ transform_error_result(output)
94
+ end
77
95
 
78
- def self.transform_text_cell(cell)
79
- tags = cell['metadata']&.fetch('tags', [])&.join(' ') || ''
80
- id = cell['id']
81
- cell_type = cell['cell_type']
96
+ decorate_output(transformed, output).join('') if transformed
97
+ end
82
98
 
83
- [
84
- %(<div class="cell #{cell_type}" data-id="#{id}" data-tags="#{tags}">\n\n),
85
- *cell['source'],
86
- "\n\n</div>\n"
87
- ].join('')
99
+ def transform_text_cell(cell)
100
+ cell['source'].append("\n")
88
101
  end
89
102
 
90
- def self.transform_metadata(notebook_json)
103
+ def transform_metadata(notebook_json)
91
104
  {
92
105
  'jupyter' => {
93
106
  'kernelspec' => notebook_json['metadata']['kernelspec'],
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-13 00:00:00.000000000 Z
11
+ date: 2021-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy