ipynbdiff 0.3.1 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +57 -2
- data/ipynbdiff.gemspec +3 -3
- data/lib/ipynbdiff.rb +29 -13
- data/lib/transformer.rb +77 -51
- metadata +5 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 54c011ed4790dc548b33d6e4773bb9bbdaedd7b8c1ffbdbcfa5e494e5380b1a9
|
|
4
|
+
data.tar.gz: 70f78c722ddc432576ec52d55617e4ce5bdf29cb7ff32ad46219ac52d0d6f5d5
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 757f9ea284f358771835db2e27ebe90cb5c8c99336313a65d04cbd0080c0891e1c25af79740cb4d191eeafec10e66c74baceef1ba24729b6ba6f743d502564fa
|
|
7
|
+
data.tar.gz: 35866e056ae8f4ab043bd9cc247f6c3f8dc3b02d475e94221ca11352badf1e96cf840e8020553ffa8c32b1359f967c74ccc67e0f4abda376e8a035d027871daf
|
data/README.md
CHANGED
|
@@ -1,3 +1,58 @@
|
|
|
1
|
-
#
|
|
1
|
+
# IpynbDiff: Better diff for Jupyter Notebooks
|
|
2
2
|
|
|
3
|
-
This is a
|
|
3
|
+
This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
|
|
4
|
+
into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
|
|
5
|
+
diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
|
|
6
|
+
that the entire file is readable on the diff.
|
|
7
|
+
|
|
8
|
+
The result are diffs that are much easier to read:
|
|
9
|
+
|
|
10
|
+
| Diff | IpynbDiff - HTML | IpynbDiff - Percent |
|
|
11
|
+
| ------ | ------ | ------ |
|
|
12
|
+
| [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
|
|
13
|
+
|  |  |  |
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
|
|
17
|
+
but now has extended functionality although not working as git driver.
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
|
|
21
|
+
### Generating diffs
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
IpynbDiff.diff(from_path, to_path, options)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Options:
|
|
28
|
+
|
|
29
|
+
```ruby
|
|
30
|
+
@default_transform_options = {
|
|
31
|
+
preprocess_input: TRUE, # Whether the input should be transformed
|
|
32
|
+
write_output_to: nil, # Pass a path to save the output to a file
|
|
33
|
+
format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
|
|
34
|
+
sources_are_files: FALSE, # Weather to use the from/to as string or path to a file
|
|
35
|
+
raise_if_invalid_notebook: FALSE, # Raises an error if the notebooks are invalid, otherwise returns nil
|
|
36
|
+
transform_options: @default_transform_options, # See below for transform options
|
|
37
|
+
diff_opts: {
|
|
38
|
+
include_diff_info: FALSE # These are passed to Diffy https://github.com/samg/diffy
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Transforming the notebooks
|
|
44
|
+
|
|
45
|
+
It might be necessary to have the transformed files in addition to the diff.
|
|
46
|
+
|
|
47
|
+
```ruby
|
|
48
|
+
IpynbDiff.transform(notebook, options)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Options:
|
|
52
|
+
|
|
53
|
+
```ruby
|
|
54
|
+
@default_transform_options = {
|
|
55
|
+
include_metadata: FALSE, # Whether to include or not the notebook metadata (kernel, language, etc)
|
|
56
|
+
cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
|
|
57
|
+
}
|
|
58
|
+
```
|
data/ipynbdiff.gemspec
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |s|
|
|
4
4
|
s.name = 'ipynbdiff'
|
|
5
|
-
s.version = '0.3.
|
|
6
|
-
s.summary = 'Human
|
|
7
|
-
s.description = '
|
|
5
|
+
s.version = '0.3.5'
|
|
6
|
+
s.summary = 'Human Readable diffs for Jupyter Notebooks'
|
|
7
|
+
s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
|
|
8
8
|
s.authors = ['Eduardo Bonet']
|
|
9
9
|
s.email = 'ebonet@gitlab.com'
|
|
10
10
|
# Specify which files should be added to the gem when it is released.
|
data/lib/ipynbdiff.rb
CHANGED
|
@@ -2,41 +2,57 @@
|
|
|
2
2
|
|
|
3
3
|
# Human Readable Jupyter Diffs
|
|
4
4
|
module IpynbDiff
|
|
5
|
-
require 'transformer
|
|
5
|
+
require 'transformer'
|
|
6
6
|
require 'diffy'
|
|
7
7
|
|
|
8
|
-
@
|
|
8
|
+
@default_transform_options = {
|
|
9
|
+
include_metadata: FALSE,
|
|
10
|
+
cell_decorator: :html
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
@default_diff_options = {
|
|
9
14
|
preprocess_input: TRUE,
|
|
10
15
|
write_output_to: nil,
|
|
11
16
|
format: :text,
|
|
12
17
|
sources_are_files: FALSE,
|
|
18
|
+
raise_if_invalid_notebook: FALSE,
|
|
19
|
+
transform_options: @default_transform_options,
|
|
13
20
|
diff_opts: {
|
|
14
21
|
include_diff_info: FALSE
|
|
15
22
|
}
|
|
16
23
|
}.freeze
|
|
17
24
|
|
|
18
|
-
def self.prepare_input(to_prepare,
|
|
19
|
-
|
|
20
|
-
prepared = File.read(prepared) if load_from_file
|
|
21
|
-
prepared = Transformer.transform(prepared) if preprocess
|
|
25
|
+
def self.prepare_input(to_prepare, options)
|
|
26
|
+
return '' unless to_prepare
|
|
22
27
|
|
|
23
|
-
|
|
28
|
+
prep = to_prepare
|
|
29
|
+
prep = File.read(prep) if options[:sources_are_files]
|
|
30
|
+
prep = transform(prep, raise_errors: TRUE, options: options[:transform_options]) if options[:preprocess_input]
|
|
31
|
+
prep
|
|
24
32
|
end
|
|
25
33
|
|
|
26
34
|
def self.diff(
|
|
27
35
|
from_notebook,
|
|
28
36
|
to_notebook,
|
|
29
|
-
options =
|
|
37
|
+
options = @default_diff_options
|
|
30
38
|
)
|
|
31
|
-
options = @
|
|
39
|
+
options = @default_diff_options.merge(options)
|
|
32
40
|
|
|
33
|
-
from = prepare_input(from_notebook, options
|
|
34
|
-
to = prepare_input(to_notebook, options
|
|
41
|
+
from = prepare_input(from_notebook, options)
|
|
42
|
+
to = prepare_input(to_notebook, options)
|
|
35
43
|
|
|
36
44
|
d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
|
|
37
|
-
|
|
38
45
|
File.write(options[:write_output_to], d) if options[:write_output_to]
|
|
39
|
-
|
|
40
46
|
d
|
|
47
|
+
rescue InvalidNotebookError
|
|
48
|
+
raise if options[:raise_if_invalid_notebook]
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def self.transform(notebook, raise_errors: FALSE, options: @default_transform_options)
|
|
52
|
+
options = @default_transform_options.merge(options)
|
|
53
|
+
|
|
54
|
+
Transformer.new(**options).transform(notebook)
|
|
55
|
+
rescue InvalidNotebookError
|
|
56
|
+
raise if raise_errors
|
|
41
57
|
end
|
|
42
58
|
end
|
data/lib/transformer.rb
CHANGED
|
@@ -1,93 +1,119 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module IpynbDiff
|
|
4
|
+
class InvalidNotebookError < StandardError
|
|
5
|
+
end
|
|
6
|
+
|
|
4
7
|
# Returns a markdown version of the Jupyter Notebook
|
|
5
8
|
class Transformer
|
|
6
9
|
require 'json'
|
|
7
10
|
require 'yaml'
|
|
8
11
|
|
|
9
|
-
|
|
12
|
+
@cell_decorator = :html
|
|
13
|
+
@include_metadata = TRUE
|
|
14
|
+
|
|
15
|
+
def initialize(include_metadata: TRUE, cell_decorator: :html)
|
|
16
|
+
@include_metadata = include_metadata
|
|
17
|
+
@cell_decorator = cell_decorator
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def validate_notebook(notebook)
|
|
10
21
|
notebook_json = JSON.parse(notebook)
|
|
11
|
-
|
|
12
|
-
|
|
22
|
+
|
|
23
|
+
return notebook_json if notebook_json.key?('cells') && notebook_json.key?('metadata')
|
|
24
|
+
|
|
25
|
+
raise InvalidNotebookError
|
|
26
|
+
rescue JSON::ParserError
|
|
27
|
+
raise InvalidNotebookError
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def transform(notebook)
|
|
31
|
+
notebook_json = validate_notebook(notebook)
|
|
32
|
+
transformed_blocks = notebook_json['cells'].map do |cell|
|
|
33
|
+
decorate_cell(transform_cell(cell, notebook_json), cell)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
|
|
13
37
|
transformed_blocks.join("\n")
|
|
14
38
|
end
|
|
15
39
|
|
|
16
|
-
def
|
|
40
|
+
def decorate_cell(rows, cell)
|
|
41
|
+
tags = cell['metadata']&.fetch('tags', [])
|
|
42
|
+
type = cell['cell_type'] || 'raw'
|
|
43
|
+
|
|
44
|
+
case @cell_decorator
|
|
45
|
+
when :html
|
|
46
|
+
rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
|
|
47
|
+
.append("\n</div>\n")
|
|
48
|
+
when :percent
|
|
49
|
+
rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
|
|
50
|
+
else
|
|
51
|
+
rows
|
|
52
|
+
end.join('')
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def transform_cell(cell, notebook)
|
|
17
56
|
cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
|
|
18
57
|
end
|
|
19
58
|
|
|
20
|
-
def
|
|
21
|
-
|
|
59
|
+
def decorate_output(output_rows, output)
|
|
60
|
+
if @cell_decorator == :html
|
|
61
|
+
output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
|
|
62
|
+
else
|
|
63
|
+
output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
|
|
64
|
+
end
|
|
65
|
+
end
|
|
22
66
|
|
|
67
|
+
def transform_code_cell(cell, notebook)
|
|
23
68
|
[
|
|
24
|
-
%(<div class="cell code" data-id="#{cell['id']}" data-tags="#{tags}">\n\n),
|
|
25
69
|
%(``` #{notebook['metadata']['kernelspec']['language']}\n),
|
|
26
70
|
*cell['source'],
|
|
27
71
|
"\n```\n",
|
|
28
|
-
*cell['outputs'].map { |output| transform_output(output) }
|
|
29
|
-
|
|
30
|
-
].join('')
|
|
72
|
+
*cell['outputs'].map { |output| transform_output(output) }
|
|
73
|
+
]
|
|
31
74
|
end
|
|
32
75
|
|
|
33
|
-
def
|
|
76
|
+
def format_traceback(traceback)
|
|
34
77
|
traceback.map do |t|
|
|
35
78
|
t.split("\n").map do |line|
|
|
36
|
-
line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip
|
|
79
|
+
line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
|
|
37
80
|
end
|
|
38
|
-
end
|
|
81
|
+
end
|
|
39
82
|
end
|
|
40
83
|
|
|
41
|
-
def
|
|
42
|
-
[
|
|
43
|
-
%(\n<div class="output execute_result">\n\n),
|
|
44
|
-
*output['data']['text/plain'].map { |line| " #{line}" },
|
|
45
|
-
"\n\n</div>\n"
|
|
46
|
-
].join('')
|
|
84
|
+
def transform_execute_result(output)
|
|
85
|
+
output['data']['text/plain'].map { |line| " #{line}" }.append("\n")
|
|
47
86
|
end
|
|
48
87
|
|
|
49
|
-
def
|
|
88
|
+
def transform_image_result(output)
|
|
50
89
|
if output['data'].key?('image/png')
|
|
51
|
-
[
|
|
52
|
-
%(\n<div class="output display_data">\n\n),
|
|
53
|
-
"})",
|
|
54
|
-
"\n\n</div>\n"
|
|
55
|
-
].join('')
|
|
90
|
+
["})", "\n"]
|
|
56
91
|
end
|
|
57
92
|
end
|
|
58
93
|
|
|
59
|
-
def
|
|
60
|
-
[
|
|
61
|
-
%(\n<div class="output error">\n\n),
|
|
62
|
-
format_traceback(output['traceback']),
|
|
63
|
-
"\n\n</div>\n"
|
|
64
|
-
].join('')
|
|
94
|
+
def transform_error_result(output)
|
|
95
|
+
format_traceback(output['traceback'])
|
|
65
96
|
end
|
|
66
97
|
|
|
67
|
-
def
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
98
|
+
def transform_output(output)
|
|
99
|
+
transformed =
|
|
100
|
+
case output['output_type']
|
|
101
|
+
when 'execute_result'
|
|
102
|
+
transform_execute_result(output)
|
|
103
|
+
when 'display_data'
|
|
104
|
+
transform_image_result(output)
|
|
105
|
+
when 'error'
|
|
106
|
+
transform_error_result(output)
|
|
107
|
+
end
|
|
77
108
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
id = cell['id']
|
|
81
|
-
cell_type = cell['cell_type']
|
|
109
|
+
decorate_output(transformed, output).join('') if transformed
|
|
110
|
+
end
|
|
82
111
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
*cell['source'],
|
|
86
|
-
"\n\n</div>\n"
|
|
87
|
-
].join('')
|
|
112
|
+
def transform_text_cell(cell)
|
|
113
|
+
cell['source'].append("\n")
|
|
88
114
|
end
|
|
89
115
|
|
|
90
|
-
def
|
|
116
|
+
def transform_metadata(notebook_json)
|
|
91
117
|
{
|
|
92
118
|
'jupyter' => {
|
|
93
119
|
'kernelspec' => notebook_json['metadata']['kernelspec'],
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ipynbdiff
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Eduardo Bonet
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-10-
|
|
11
|
+
date: 2021-10-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: diffy
|
|
@@ -108,7 +108,8 @@ dependencies:
|
|
|
108
108
|
- - ">="
|
|
109
109
|
- !ruby/object:Gem::Version
|
|
110
110
|
version: '0'
|
|
111
|
-
description:
|
|
111
|
+
description: Better diff for Jupyter Notebooks by first preprocessing them and removing
|
|
112
|
+
clutter
|
|
112
113
|
email: ebonet@gitlab.com
|
|
113
114
|
executables: []
|
|
114
115
|
extensions: []
|
|
@@ -143,5 +144,5 @@ requirements: []
|
|
|
143
144
|
rubygems_version: 3.1.6
|
|
144
145
|
signing_key:
|
|
145
146
|
specification_version: 4
|
|
146
|
-
summary: Human
|
|
147
|
+
summary: Human Readable diffs for Jupyter Notebooks
|
|
147
148
|
test_files: []
|