ipynbdiff 0.3.3 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +56 -2
- data/ipynbdiff.gemspec +1 -1
- data/lib/ipynbdiff.rb +15 -5
- data/lib/transformer.rb +64 -51
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d87c49e29a069b5064985b1b1097ac4d02c375161d9fe64df8e70f0155419220
|
4
|
+
data.tar.gz: 97931332962139f7f129a546bb7a7f9b670d779dcb032b27fbb1a3faa6f7dc85
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 86c4be0fc53e0db2172221a17dd07e0c3f37492ffc699075b0de74dea3265feebee2207ef4e9170ead59be15c42b858918fbd2e56c7ed109cb674fbf7eb7dbbb
|
7
|
+
data.tar.gz: 10fdb124114a73425edc57f5c9fb80fbd546f6b8e7d58929a327f1be268774d8409238630286aa050e41384bf53c951d2b8c67aaeb76835bc3e0f37672775d48
|
data/README.md
CHANGED
@@ -1,3 +1,57 @@
|
|
1
|
-
#
|
1
|
+
# IpynbDiff: Better diff for Jupyter Notebooks
|
2
2
|
|
3
|
-
This is a
|
3
|
+
This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
|
4
|
+
into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
|
5
|
+
diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
|
6
|
+
that the entire file is readable on the diff.
|
7
|
+
|
8
|
+
The result are diffs that are much easier to read:
|
9
|
+
|
10
|
+
| Diff | IpynbDiff - HTML | IpynbDiff - Percent |
|
11
|
+
| ------ | ------ | ------ |
|
12
|
+
| [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
|
13
|
+
|  |  |  |
|
14
|
+
|
15
|
+
|
16
|
+
This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
|
17
|
+
but now has extended functionality although not working as git driver.
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
### Generating diffs
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
IpynbDiff.diff(from_path, to_path, options)
|
25
|
+
```
|
26
|
+
|
27
|
+
Options:
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
@default_transform_options = {
|
31
|
+
preprocess_input: TRUE, # Whether the input should be transformed
|
32
|
+
write_output_to: nil, # Pass a path to save the output to a file
|
33
|
+
format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
|
34
|
+
sources_are_files: FALSE, # Weather to use the from/to as string or path to a file
|
35
|
+
transform_options: @default_transform_options, # See below for transform options
|
36
|
+
diff_opts: {
|
37
|
+
include_diff_info: FALSE # These are passed to Diffy https://github.com/samg/diffy
|
38
|
+
}
|
39
|
+
}
|
40
|
+
```
|
41
|
+
|
42
|
+
### Transforming the notebooks
|
43
|
+
|
44
|
+
It might be necessary to have the transformed files in addition to the diff.
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
IpynbDiff.transform(notebook, options)
|
48
|
+
```
|
49
|
+
|
50
|
+
Options:
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
@default_transform_options = {
|
54
|
+
include_metadata: FALSE, # Whether to include or not the notebook metadata (kernel, language, etc)
|
55
|
+
cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
|
56
|
+
}
|
57
|
+
```
|
data/ipynbdiff.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'ipynbdiff'
|
5
|
-
s.version = '0.3.
|
5
|
+
s.version = '0.3.4'
|
6
6
|
s.summary = 'Human Readable diffs for Jupyter Notebooks'
|
7
7
|
s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
|
8
8
|
s.authors = ['Eduardo Bonet']
|
data/lib/ipynbdiff.rb
CHANGED
@@ -2,15 +2,20 @@
|
|
2
2
|
|
3
3
|
# Human Readable Jupyter Diffs
|
4
4
|
module IpynbDiff
|
5
|
-
require 'transformer
|
5
|
+
require 'transformer'
|
6
6
|
require 'diffy'
|
7
7
|
|
8
|
-
@
|
8
|
+
@default_transform_options = {
|
9
|
+
include_metadata: FALSE,
|
10
|
+
cell_decorator: :html
|
11
|
+
}
|
12
|
+
|
13
|
+
@default_diff_options = {
|
9
14
|
preprocess_input: TRUE,
|
10
15
|
write_output_to: nil,
|
11
16
|
format: :text,
|
12
17
|
sources_are_files: FALSE,
|
13
|
-
|
18
|
+
transform_options: @default_transform_options,
|
14
19
|
diff_opts: {
|
15
20
|
include_diff_info: FALSE
|
16
21
|
}
|
@@ -19,7 +24,7 @@ module IpynbDiff
|
|
19
24
|
def self.prepare_input(to_prepare, options)
|
20
25
|
prepared = to_prepare
|
21
26
|
prepared = File.read(prepared) if options[:sources_are_files]
|
22
|
-
prepared =
|
27
|
+
prepared = transform(prepared, options[:transform_options]) if options[:preprocess_input]
|
23
28
|
|
24
29
|
prepared
|
25
30
|
end
|
@@ -29,7 +34,7 @@ module IpynbDiff
|
|
29
34
|
to_notebook,
|
30
35
|
options = {}
|
31
36
|
)
|
32
|
-
options = @
|
37
|
+
options = @default_diff_options.merge(options)
|
33
38
|
|
34
39
|
from = from_notebook && prepare_input(from_notebook, options) || ''
|
35
40
|
to = to_notebook && prepare_input(to_notebook, options) || ''
|
@@ -40,4 +45,9 @@ module IpynbDiff
|
|
40
45
|
|
41
46
|
d
|
42
47
|
end
|
48
|
+
|
49
|
+
def self.transform(notebook, options)
|
50
|
+
options = @default_transform_options.merge(options)
|
51
|
+
Transformer.new(**options).transform(notebook)
|
52
|
+
end
|
43
53
|
end
|
data/lib/transformer.rb
CHANGED
@@ -6,88 +6,101 @@ module IpynbDiff
|
|
6
6
|
require 'json'
|
7
7
|
require 'yaml'
|
8
8
|
|
9
|
-
|
9
|
+
@cell_decorator = :html
|
10
|
+
@include_metadata = TRUE
|
11
|
+
|
12
|
+
def initialize(include_metadata: TRUE, cell_decorator: :html)
|
13
|
+
@include_metadata = include_metadata
|
14
|
+
@cell_decorator = cell_decorator
|
15
|
+
end
|
16
|
+
|
17
|
+
def transform(notebook)
|
10
18
|
notebook_json = JSON.parse(notebook)
|
11
|
-
transformed_blocks = notebook_json['cells'].map
|
12
|
-
|
19
|
+
transformed_blocks = notebook_json['cells'].map do |cell|
|
20
|
+
decorate_cell(transform_cell(cell, notebook_json), cell)
|
21
|
+
end
|
22
|
+
|
23
|
+
transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
|
13
24
|
transformed_blocks.join("\n")
|
14
25
|
end
|
15
26
|
|
16
|
-
def
|
27
|
+
def decorate_cell(rows, cell)
|
28
|
+
tags = cell['metadata']&.fetch('tags', [])
|
29
|
+
type = cell['cell_type']
|
30
|
+
|
31
|
+
case @cell_decorator
|
32
|
+
when :html
|
33
|
+
rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
|
34
|
+
.append("\n</div>\n")
|
35
|
+
when :percent
|
36
|
+
rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
|
37
|
+
else
|
38
|
+
rows
|
39
|
+
end.join('')
|
40
|
+
end
|
41
|
+
|
42
|
+
def transform_cell(cell, notebook)
|
17
43
|
cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
|
18
44
|
end
|
19
45
|
|
20
|
-
def
|
21
|
-
|
46
|
+
def decorate_output(output_rows, output)
|
47
|
+
if @cell_decorator == :html
|
48
|
+
output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
|
49
|
+
else
|
50
|
+
output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
|
51
|
+
end
|
52
|
+
end
|
22
53
|
|
54
|
+
def transform_code_cell(cell, notebook)
|
23
55
|
[
|
24
|
-
%(<div class="cell code" data-id="#{cell['id']}" data-tags="#{tags}">\n\n),
|
25
56
|
%(``` #{notebook['metadata']['kernelspec']['language']}\n),
|
26
57
|
*cell['source'],
|
27
58
|
"\n```\n",
|
28
|
-
*cell['outputs'].map { |output| transform_output(output) }
|
29
|
-
|
30
|
-
].join('')
|
59
|
+
*cell['outputs'].map { |output| transform_output(output) }
|
60
|
+
]
|
31
61
|
end
|
32
62
|
|
33
|
-
def
|
63
|
+
def format_traceback(traceback)
|
34
64
|
traceback.map do |t|
|
35
65
|
t.split("\n").map do |line|
|
36
|
-
line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip
|
66
|
+
line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
|
37
67
|
end
|
38
|
-
end
|
68
|
+
end
|
39
69
|
end
|
40
70
|
|
41
|
-
def
|
42
|
-
[
|
43
|
-
%(\n<div class="output execute_result">\n\n),
|
44
|
-
*output['data']['text/plain'].map { |line| " #{line}" },
|
45
|
-
"\n\n</div>\n"
|
46
|
-
].join('')
|
71
|
+
def transform_execute_result(output)
|
72
|
+
output['data']['text/plain'].map { |line| " #{line}" }.append("\n")
|
47
73
|
end
|
48
74
|
|
49
|
-
def
|
75
|
+
def transform_image_result(output)
|
50
76
|
if output['data'].key?('image/png')
|
51
|
-
[
|
52
|
-
%(\n<div class="output display_data">\n\n),
|
53
|
-
"})",
|
54
|
-
"\n\n</div>\n"
|
55
|
-
].join('')
|
77
|
+
["})", "\n"]
|
56
78
|
end
|
57
79
|
end
|
58
80
|
|
59
|
-
def
|
60
|
-
[
|
61
|
-
%(\n<div class="output error">\n\n),
|
62
|
-
format_traceback(output['traceback']),
|
63
|
-
"\n\n</div>\n"
|
64
|
-
].join('')
|
81
|
+
def transform_error_result(output)
|
82
|
+
format_traceback(output['traceback'])
|
65
83
|
end
|
66
84
|
|
67
|
-
def
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
85
|
+
def transform_output(output)
|
86
|
+
transformed =
|
87
|
+
case output['output_type']
|
88
|
+
when 'execute_result'
|
89
|
+
transform_execute_result(output)
|
90
|
+
when 'display_data'
|
91
|
+
transform_image_result(output)
|
92
|
+
when 'error'
|
93
|
+
transform_error_result(output)
|
94
|
+
end
|
77
95
|
|
78
|
-
|
79
|
-
|
80
|
-
id = cell['id']
|
81
|
-
cell_type = cell['cell_type']
|
96
|
+
decorate_output(transformed, output).join('') if transformed
|
97
|
+
end
|
82
98
|
|
83
|
-
|
84
|
-
|
85
|
-
*cell['source'],
|
86
|
-
"\n\n</div>\n"
|
87
|
-
].join('')
|
99
|
+
def transform_text_cell(cell)
|
100
|
+
cell['source'].append("\n")
|
88
101
|
end
|
89
102
|
|
90
|
-
def
|
103
|
+
def transform_metadata(notebook_json)
|
91
104
|
{
|
92
105
|
'jupyter' => {
|
93
106
|
'kernelspec' => notebook_json['metadata']['kernelspec'],
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ipynbdiff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo Bonet
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-10-
|
11
|
+
date: 2021-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: diffy
|