ipynbdiff 0.3.9 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +6 -3
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +31 -0
- data/Gemfile +2 -1
- data/Gemfile.lock +20 -1
- data/README.md +5 -6
- data/ipynbdiff.gemspec +7 -3
- data/lib/diff.rb +20 -0
- data/lib/ipynb_symbol_map.rb +151 -0
- data/lib/ipynbdiff.rb +8 -43
- data/lib/output_transformer.rb +40 -29
- data/lib/symbolized_markdown_helper.rb +30 -0
- data/lib/transformed_notebook.rb +20 -0
- data/lib/transformer.rb +50 -46
- data/lib/version.rb +1 -1
- metadata +24 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8f722c49e19511d249e90f620b67e69869d6dbd226c280f007b21b68e8675482
|
|
4
|
+
data.tar.gz: b77b6733387649d2b9ad26fc9143d4520a54901bca63b9f94b6e70e67c02288b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2a5c9d3ba57aee0bfab6f6e435804ce70f7128cf3e19cb04396fcf286a3a7b8fc3f0f451d2c4d4cf91c52dc0013fe5b55b3aa169744801037e0505689be61792
|
|
7
|
+
data.tar.gz: 977baf80af614a98af4405816c8776fa9e9ac7323a5886ec5ff7af8e4b3c151aeff1b2b2398cbdc5d1f5d4f815065b974292527980a13c7ac7eeb2077f036a73
|
data/.gitlab-ci.yml
CHANGED
|
@@ -15,11 +15,11 @@ build-gem:
|
|
|
15
15
|
stage: build
|
|
16
16
|
script:
|
|
17
17
|
- bundle install
|
|
18
|
-
- cat .VERSION.TMPL | sed s/GEM_VERSION
|
|
18
|
+
- cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.0/ > lib/version.rb
|
|
19
19
|
- gem build ipynbdiff.gemspec
|
|
20
20
|
artifacts:
|
|
21
21
|
paths:
|
|
22
|
-
- ipynbdiff
|
|
22
|
+
- ipynbdiff-0.0.0.gem
|
|
23
23
|
needs:
|
|
24
24
|
- specs
|
|
25
25
|
|
|
@@ -27,10 +27,13 @@ deploy-gem:
|
|
|
27
27
|
stage: rubygems
|
|
28
28
|
script:
|
|
29
29
|
- bundle install
|
|
30
|
+
- cat .VERSION.TMPL | sed s/GEM_VERSION/$CI_COMMIT_TAG/ > lib/version.rb
|
|
31
|
+
- gem build ipynbdiff.gemspec
|
|
30
32
|
- gem push ipynbdiff-$CI_COMMIT_TAG.gem
|
|
31
33
|
only:
|
|
32
|
-
- master
|
|
33
34
|
- tags
|
|
35
|
+
except:
|
|
36
|
+
- branches
|
|
34
37
|
needs:
|
|
35
38
|
- build-gem
|
|
36
39
|
when: manual
|
data/.rubocop.yml
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
data/.rubocop_todo.yml
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# This configuration was generated by
|
|
2
|
+
# `rubocop --auto-gen-config`
|
|
3
|
+
# on 2021-12-22 14:13:29 UTC using RuboCop version 1.23.0.
|
|
4
|
+
# The point is for the user to remove these configuration records
|
|
5
|
+
# one by one as the offenses are removed from the code base.
|
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
|
8
|
+
|
|
9
|
+
# Offense count: 1
|
|
10
|
+
# Configuration parameters: Include.
|
|
11
|
+
# Include: **/*.gemspec
|
|
12
|
+
Gemspec/RequiredRubyVersion:
|
|
13
|
+
Exclude:
|
|
14
|
+
- 'ipynbdiff.gemspec'
|
|
15
|
+
|
|
16
|
+
AllCops:
|
|
17
|
+
NewCops: enable
|
|
18
|
+
|
|
19
|
+
Style/StringConcatenation:
|
|
20
|
+
Enabled: false
|
|
21
|
+
|
|
22
|
+
# Offense count: 6
|
|
23
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
|
|
24
|
+
# IgnoredMethods: refine
|
|
25
|
+
Metrics/BlockLength:
|
|
26
|
+
Enabled: false
|
|
27
|
+
|
|
28
|
+
# Offense count: 3
|
|
29
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
|
|
30
|
+
Metrics/MethodLength:
|
|
31
|
+
Enabled: false
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
1
|
GEM
|
|
2
2
|
remote: https://rubygems.org/
|
|
3
3
|
specs:
|
|
4
|
+
ast (2.4.2)
|
|
5
|
+
binding_ninja (0.2.3)
|
|
6
|
+
coderay (1.1.3)
|
|
4
7
|
diff-lcs (1.4.4)
|
|
5
8
|
diffy (3.3.0)
|
|
6
9
|
json (2.5.1)
|
|
10
|
+
parser (3.0.2.0)
|
|
11
|
+
ast (~> 2.4.1)
|
|
12
|
+
proc_to_ast (0.1.0)
|
|
13
|
+
coderay
|
|
14
|
+
parser
|
|
15
|
+
unparser
|
|
7
16
|
rspec (3.10.0)
|
|
8
17
|
rspec-core (~> 3.10.0)
|
|
9
18
|
rspec-expectations (~> 3.10.0)
|
|
@@ -16,7 +25,16 @@ GEM
|
|
|
16
25
|
rspec-mocks (3.10.2)
|
|
17
26
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
18
27
|
rspec-support (~> 3.10.0)
|
|
28
|
+
rspec-parameterized (0.5.0)
|
|
29
|
+
binding_ninja (>= 0.2.3)
|
|
30
|
+
parser
|
|
31
|
+
proc_to_ast
|
|
32
|
+
rspec (>= 2.13, < 4)
|
|
33
|
+
unparser
|
|
19
34
|
rspec-support (3.10.2)
|
|
35
|
+
unparser (0.6.0)
|
|
36
|
+
diff-lcs (~> 1.3)
|
|
37
|
+
parser (>= 3.0.0)
|
|
20
38
|
|
|
21
39
|
PLATFORMS
|
|
22
40
|
ruby
|
|
@@ -26,6 +44,7 @@ DEPENDENCIES
|
|
|
26
44
|
diffy (= 3.3.0)
|
|
27
45
|
json (= 2.5.1)
|
|
28
46
|
rspec (= 3.10.0)
|
|
47
|
+
rspec-parameterized (= 0.5.0)
|
|
29
48
|
|
|
30
49
|
BUNDLED WITH
|
|
31
|
-
2.2.
|
|
50
|
+
2.2.30
|
data/README.md
CHANGED
|
@@ -7,10 +7,10 @@ that the entire file is readable on the diff.
|
|
|
7
7
|
|
|
8
8
|
The result are diffs that are much easier to read:
|
|
9
9
|
|
|
10
|
-
| Diff |
|
|
11
|
-
| ------ |
|
|
12
|
-
| [Here](example/diff.txt) |
|
|
13
|
-
|  |
|
|
10
|
+
| Diff | | IpynbDiff |
|
|
11
|
+
| ------ | ------ |
|
|
12
|
+
| [Here](example/diff.txt) | [Here](example/ipynbdiff_percent.txt) |
|
|
13
|
+
|  |  |
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
|
|
@@ -52,7 +52,6 @@ Options:
|
|
|
52
52
|
|
|
53
53
|
```ruby
|
|
54
54
|
@default_transform_options = {
|
|
55
|
-
|
|
56
|
-
cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
|
|
55
|
+
include_frontmatter: false, # Whether to include or not the notebook metadata (kernel, language, etc)
|
|
57
56
|
}
|
|
58
57
|
```
|
data/ipynbdiff.gemspec
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require_relative
|
|
3
|
+
require_relative 'lib/version'
|
|
4
4
|
|
|
5
5
|
Gem::Specification.new do |s|
|
|
6
6
|
s.name = 'ipynbdiff'
|
|
@@ -11,10 +11,10 @@ Gem::Specification.new do |s|
|
|
|
11
11
|
s.email = 'ebonet@gitlab.com'
|
|
12
12
|
# Specify which files should be added to the gem when it is released.
|
|
13
13
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
14
|
-
s.files
|
|
14
|
+
s.files = Dir.chdir(File.expand_path(__dir__)) do
|
|
15
15
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec|example)/}) }
|
|
16
16
|
end
|
|
17
|
-
s.homepage
|
|
17
|
+
s.homepage =
|
|
18
18
|
'https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff'
|
|
19
19
|
s.license = 'MIT'
|
|
20
20
|
|
|
@@ -28,4 +28,8 @@ Gem::Specification.new do |s|
|
|
|
28
28
|
s.add_development_dependency 'pry'
|
|
29
29
|
s.add_development_dependency 'rake'
|
|
30
30
|
s.add_development_dependency 'rspec'
|
|
31
|
+
s.add_development_dependency 'rspec-parametized'
|
|
32
|
+
s.metadata = {
|
|
33
|
+
'rubygems_mfa_required' => 'true'
|
|
34
|
+
}
|
|
31
35
|
end
|
data/lib/diff.rb
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Custom differ for Jupyter Notebooks
|
|
4
|
+
module IpynbDiff
|
|
5
|
+
require 'delegate'
|
|
6
|
+
|
|
7
|
+
# The result of a diff object
|
|
8
|
+
class Diff < SimpleDelegator
|
|
9
|
+
require 'diffy'
|
|
10
|
+
|
|
11
|
+
attr_reader :from, :to
|
|
12
|
+
|
|
13
|
+
def initialize(from, to, diffy_opts)
|
|
14
|
+
super(Diffy::Diff.new(from.as_text, to.as_text, **diffy_opts))
|
|
15
|
+
|
|
16
|
+
@from = from
|
|
17
|
+
@to = to
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module IpynbDiff
|
|
4
|
+
class InvalidTokenError < StandardError
|
|
5
|
+
end
|
|
6
|
+
|
|
7
|
+
# Creates a symbol map for a ipynb file (JSON format)
|
|
8
|
+
class IpynbSymbolMap
|
|
9
|
+
class << self
|
|
10
|
+
def parse(notebook)
|
|
11
|
+
IpynbSymbolMap.new(notebook).parse('')
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
attr_reader :current_line, :char_idx, :results
|
|
16
|
+
|
|
17
|
+
WHITESPACE_CHARS = ["\t", "\r", ' ', "\n"].freeze
|
|
18
|
+
|
|
19
|
+
VALUE_STOPPERS = [',', '[', ']', '{', '}', *WHITESPACE_CHARS].freeze
|
|
20
|
+
|
|
21
|
+
def initialize(notebook)
|
|
22
|
+
@chars = notebook.chars
|
|
23
|
+
@current_line = 0
|
|
24
|
+
@char_idx = 0
|
|
25
|
+
@results = {}
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def parse(prefix = '.')
|
|
29
|
+
skip_whitespaces
|
|
30
|
+
|
|
31
|
+
if (c = current_char) == '"'
|
|
32
|
+
parse_string
|
|
33
|
+
elsif c == '['
|
|
34
|
+
parse_array(prefix)
|
|
35
|
+
elsif c == '{'
|
|
36
|
+
parse_object(prefix)
|
|
37
|
+
else
|
|
38
|
+
parse_value
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
results
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def parse_array(prefix)
|
|
45
|
+
# [1, 2, {"some": "object"}, [1]]
|
|
46
|
+
|
|
47
|
+
i = 0
|
|
48
|
+
|
|
49
|
+
current_should_be '['
|
|
50
|
+
|
|
51
|
+
loop do
|
|
52
|
+
break if skip_beginning(']')
|
|
53
|
+
|
|
54
|
+
new_prefix = "#{prefix}.#{i}"
|
|
55
|
+
|
|
56
|
+
add_result(new_prefix, current_line)
|
|
57
|
+
|
|
58
|
+
parse(new_prefix)
|
|
59
|
+
|
|
60
|
+
i += 1
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def parse_object(prefix)
|
|
65
|
+
# {"name":"value", "another_name": [1, 2, 3]}
|
|
66
|
+
|
|
67
|
+
current_should_be '{'
|
|
68
|
+
|
|
69
|
+
loop do
|
|
70
|
+
break if skip_beginning('}')
|
|
71
|
+
|
|
72
|
+
prop_name = parse_string
|
|
73
|
+
|
|
74
|
+
new_prefix = "#{prefix}.#{prop_name}"
|
|
75
|
+
|
|
76
|
+
add_result(new_prefix, current_line)
|
|
77
|
+
|
|
78
|
+
next_and_skip_whitespaces
|
|
79
|
+
|
|
80
|
+
current_should_be ':'
|
|
81
|
+
|
|
82
|
+
next_and_skip_whitespaces
|
|
83
|
+
|
|
84
|
+
parse(new_prefix)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def parse_string
|
|
89
|
+
value = ''
|
|
90
|
+
prev_char = nil
|
|
91
|
+
|
|
92
|
+
current_should_be '"'
|
|
93
|
+
|
|
94
|
+
loop do
|
|
95
|
+
increment_char_index
|
|
96
|
+
break if (c = current_char) == '"' && prev_char != '\\'
|
|
97
|
+
|
|
98
|
+
value += (prev_char = c)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
value
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def add_result(key, line_number)
|
|
105
|
+
@results[key] = line_number
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def parse_value
|
|
109
|
+
increment_char_index until VALUE_STOPPERS.include?(current_char)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def skip_whitespaces
|
|
113
|
+
while WHITESPACE_CHARS.include?(current_char)
|
|
114
|
+
check_for_new_line
|
|
115
|
+
increment_char_index
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def increment_char_index
|
|
120
|
+
@char_idx += 1
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def next_and_skip_whitespaces
|
|
124
|
+
increment_char_index
|
|
125
|
+
skip_whitespaces
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def current_char
|
|
129
|
+
@chars[@char_idx]
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def current_should_be(another_char)
|
|
133
|
+
raise InvalidTokenError unless current_char == another_char
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def check_for_new_line
|
|
137
|
+
@current_line += 1 if current_char == "\n"
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def skip_beginning(closing_char)
|
|
141
|
+
|
|
142
|
+
check_for_new_line
|
|
143
|
+
|
|
144
|
+
next_and_skip_whitespaces
|
|
145
|
+
|
|
146
|
+
return true if current_char == closing_char
|
|
147
|
+
|
|
148
|
+
next_and_skip_whitespaces if current_char == ','
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
data/lib/ipynbdiff.rb
CHANGED
|
@@ -3,55 +3,20 @@
|
|
|
3
3
|
# Human Readable Jupyter Diffs
|
|
4
4
|
module IpynbDiff
|
|
5
5
|
require 'transformer'
|
|
6
|
-
require '
|
|
6
|
+
require 'diff'
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
cell_decorator: :html
|
|
11
|
-
}
|
|
8
|
+
def self.diff(from, to, raise_if_invalid_nb: false, include_frontmatter: false, diffy_opts: {})
|
|
9
|
+
transformer = Transformer.new(include_frontmatter: include_frontmatter)
|
|
12
10
|
|
|
13
|
-
|
|
14
|
-
preprocess_input: true,
|
|
15
|
-
write_output_to: nil,
|
|
16
|
-
format: :text,
|
|
17
|
-
sources_are_files: false,
|
|
18
|
-
raise_if_invalid_notebook: false,
|
|
19
|
-
transform_options: @default_transform_options,
|
|
20
|
-
diff_opts: {
|
|
21
|
-
include_diff_info: false
|
|
22
|
-
}
|
|
23
|
-
}.freeze
|
|
24
|
-
|
|
25
|
-
def self.prepare_input(to_prepare, options)
|
|
26
|
-
return '' unless to_prepare
|
|
27
|
-
|
|
28
|
-
prep = to_prepare
|
|
29
|
-
prep = File.read(prep) if options[:sources_are_files]
|
|
30
|
-
prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
|
|
31
|
-
prep
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
def self.diff(
|
|
35
|
-
from_notebook,
|
|
36
|
-
to_notebook,
|
|
37
|
-
options = @default_diff_options
|
|
38
|
-
)
|
|
39
|
-
options = @default_diff_options.merge(options)
|
|
40
|
-
|
|
41
|
-
from = prepare_input(from_notebook, options)
|
|
42
|
-
to = prepare_input(to_notebook, options)
|
|
43
|
-
|
|
44
|
-
d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
|
|
45
|
-
File.write(options[:write_output_to], d) if options[:write_output_to]
|
|
46
|
-
d
|
|
11
|
+
Diff.new(transformer.transform(from), transformer.transform(to), diffy_opts)
|
|
47
12
|
rescue InvalidNotebookError
|
|
48
|
-
raise if
|
|
13
|
+
raise if raise_if_invalid_nb
|
|
49
14
|
end
|
|
50
15
|
|
|
51
|
-
def self.transform(notebook, raise_errors: false,
|
|
52
|
-
|
|
16
|
+
def self.transform(notebook, raise_errors: false, include_frontmatter: true)
|
|
17
|
+
return unless notebook
|
|
53
18
|
|
|
54
|
-
Transformer.new(
|
|
19
|
+
Transformer.new(include_frontmatter: include_frontmatter).transform(notebook).as_text
|
|
55
20
|
rescue InvalidNotebookError
|
|
56
21
|
raise if raise_errors
|
|
57
22
|
end
|
data/lib/output_transformer.rb
CHANGED
|
@@ -1,65 +1,76 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module IpynbDiff
|
|
4
|
-
|
|
5
4
|
# Transforms Jupyter output data into markdown
|
|
6
5
|
class OutputTransformer
|
|
6
|
+
require 'symbolized_markdown_helper'
|
|
7
|
+
include SymbolizedMarkdownHelper
|
|
7
8
|
|
|
8
9
|
ORDERED_KEYS = {
|
|
9
10
|
'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
|
|
10
11
|
'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
|
|
11
12
|
}.freeze
|
|
12
13
|
|
|
13
|
-
def transform(output)
|
|
14
|
-
case (output_type = output['output_type'])
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
14
|
+
def transform(output, symbol)
|
|
15
|
+
transformed = case (output_type = output['output_type'])
|
|
16
|
+
when 'error'
|
|
17
|
+
transform_error(output['traceback'], symbol / 'traceback')
|
|
18
|
+
when 'execute_result', 'display_data'
|
|
19
|
+
transform_non_error(ORDERED_KEYS[output_type], output['data'], symbol / 'data')
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
decorate_output(transformed, output, symbol) if transformed
|
|
20
23
|
end
|
|
21
24
|
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
25
|
+
def decorate_output(output_rows, output, symbol)
|
|
26
|
+
[
|
|
27
|
+
_,
|
|
28
|
+
symbol, %(%%%% Output: #{output['output_type']}),
|
|
29
|
+
_,
|
|
30
|
+
*output_rows
|
|
31
|
+
]
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def transform_error(traceback, symbol)
|
|
35
|
+
traceback.map.with_index do |t, idx|
|
|
36
|
+
t.split("\n").map do |l|
|
|
37
|
+
[symbol / idx, l.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip]
|
|
26
38
|
end
|
|
27
39
|
end
|
|
28
40
|
end
|
|
29
41
|
|
|
30
|
-
def transform_non_error(accepted_keys, elements)
|
|
31
|
-
accepted_keys.map do |key|
|
|
32
|
-
transform_element(key, elements[key]
|
|
33
|
-
end
|
|
42
|
+
def transform_non_error(accepted_keys, elements, symbol)
|
|
43
|
+
accepted_keys.filter { |key| elements.key?(key) }.map do |key|
|
|
44
|
+
transform_element(key, elements[key], symbol)
|
|
45
|
+
end
|
|
34
46
|
end
|
|
35
47
|
|
|
36
|
-
def transform_element(output_type, output_element)
|
|
48
|
+
def transform_element(output_type, output_element, symbol_prefix)
|
|
49
|
+
new_symbol = symbol_prefix / output_type
|
|
37
50
|
case output_type
|
|
38
51
|
when 'image/png', 'image/jpeg'
|
|
39
|
-
transform_image(output_type, output_element)
|
|
52
|
+
transform_image(output_type, output_element, new_symbol)
|
|
40
53
|
when 'image/svg+xml'
|
|
41
|
-
transform_svg(output_element)
|
|
54
|
+
transform_svg(output_element, new_symbol)
|
|
42
55
|
when 'text/markdown', 'text/latex', 'text/plain'
|
|
43
|
-
transform_text(output_element)
|
|
56
|
+
transform_text(output_element, new_symbol)
|
|
44
57
|
end
|
|
45
58
|
end
|
|
46
59
|
|
|
47
|
-
def transform_image(image_type, image_content)
|
|
48
|
-
[" })"
|
|
60
|
+
def transform_image(image_type, image_content, symbol)
|
|
61
|
+
[symbol, " })"]
|
|
49
62
|
end
|
|
50
63
|
|
|
51
|
-
def transform_svg(image_content)
|
|
64
|
+
def transform_svg(image_content, symbol)
|
|
52
65
|
lines = image_content.is_a?(Array) ? image_content : [image_content]
|
|
53
66
|
|
|
54
|
-
single_line = lines.map(&:strip).join
|
|
67
|
+
single_line = lines.map(&:strip).join.gsub(/\s+/, ' ')
|
|
55
68
|
|
|
56
|
-
[" "
|
|
69
|
+
[symbol, " "]
|
|
57
70
|
end
|
|
58
71
|
|
|
59
|
-
def transform_text(text_content)
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
lines.map { |line| " #{line}" }.append("\n")
|
|
72
|
+
def transform_text(text_content, symbol)
|
|
73
|
+
symbolize_array(symbol, text_content) { |l| " #{l.rstrip}" }
|
|
63
74
|
end
|
|
64
75
|
end
|
|
65
76
|
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module IpynbDiff
|
|
4
|
+
# Helper functions
|
|
5
|
+
module SymbolizedMarkdownHelper
|
|
6
|
+
|
|
7
|
+
def _(content = '')
|
|
8
|
+
[nil, content]
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def array_if_not_array(thing)
|
|
12
|
+
thing.is_a?(Array) ? thing : [thing]
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def symbolize_array(symbol, content, &block)
|
|
16
|
+
if content.is_a?(Array)
|
|
17
|
+
content.map.with_index { |l, idx| [symbol / idx, block.call(l)] }
|
|
18
|
+
else
|
|
19
|
+
[symbol, content]
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Simple wrapper for a string
|
|
25
|
+
class JsonSymbol < String
|
|
26
|
+
def /(other)
|
|
27
|
+
JsonSymbol.new((other.is_a?(Array) ? [self, *other] : [self, other]).join('.'))
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module IpynbDiff
|
|
4
|
+
# Notebook that was transformed into md, including location of source cells
|
|
5
|
+
class TransformedNotebook
|
|
6
|
+
attr_reader :blocks
|
|
7
|
+
|
|
8
|
+
def as_text
|
|
9
|
+
@blocks.map { |b| b[:content] }.join("\n")
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def initialize(lines = [], symbols = [], symbol_map = {})
|
|
15
|
+
@blocks = lines.zip(symbols).map do |line, symbol|
|
|
16
|
+
{ content: line, source_symbol: symbol, source_line: symbol && symbol_map[symbol] }
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
data/lib/transformer.rb
CHANGED
|
@@ -9,14 +9,15 @@ module IpynbDiff
|
|
|
9
9
|
require 'json'
|
|
10
10
|
require 'yaml'
|
|
11
11
|
require 'output_transformer'
|
|
12
|
+
require 'symbolized_markdown_helper'
|
|
13
|
+
require 'ipynb_symbol_map'
|
|
14
|
+
require 'transformed_notebook'
|
|
15
|
+
include SymbolizedMarkdownHelper
|
|
12
16
|
|
|
13
|
-
@
|
|
14
|
-
@include_metadata = true
|
|
17
|
+
@include_frontmatter = true
|
|
15
18
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
@include_metadata = include_metadata
|
|
19
|
-
@cell_decorator = cell_decorator
|
|
19
|
+
def initialize(include_frontmatter: true)
|
|
20
|
+
@include_frontmatter = include_frontmatter
|
|
20
21
|
@output_transformer = OutputTransformer.new
|
|
21
22
|
end
|
|
22
23
|
|
|
@@ -31,71 +32,74 @@ module IpynbDiff
|
|
|
31
32
|
end
|
|
32
33
|
|
|
33
34
|
def transform(notebook)
|
|
34
|
-
|
|
35
|
-
transformed_blocks = notebook_json['cells'].map do |cell|
|
|
36
|
-
decorate_cell(transform_cell(cell, notebook_json), cell)
|
|
37
|
-
end
|
|
35
|
+
return TransformedNotebook.new unless notebook
|
|
38
36
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
37
|
+
notebook_json = validate_notebook(notebook)
|
|
38
|
+
transformed = transform_document(notebook_json)
|
|
39
|
+
symbol_map = IpynbSymbolMap.parse(notebook)
|
|
42
40
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
41
|
+
symbols, lines = if transformed && !transformed.empty?
|
|
42
|
+
transformed.partition.each_with_index { |_el, i| i.even? }
|
|
43
|
+
else
|
|
44
|
+
[[], []]
|
|
45
|
+
end
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
when :html
|
|
49
|
-
rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
|
|
50
|
-
.append("\n</div>\n")
|
|
51
|
-
when :percent
|
|
52
|
-
rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
|
|
53
|
-
else
|
|
54
|
-
rows
|
|
55
|
-
end.join('')
|
|
47
|
+
TransformedNotebook.new(lines, symbols, symbol_map)
|
|
56
48
|
end
|
|
57
49
|
|
|
58
|
-
def
|
|
59
|
-
|
|
60
|
-
end
|
|
50
|
+
def transform_document(notebook)
|
|
51
|
+
symbol = JsonSymbol.new('.cells')
|
|
61
52
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
|
|
65
|
-
else
|
|
66
|
-
output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
|
|
53
|
+
transformed_blocks = notebook['cells'].map.with_index do |cell, idx|
|
|
54
|
+
decorate_cell(transform_cell(cell, notebook, symbol / idx), cell, symbol / idx)
|
|
67
55
|
end
|
|
56
|
+
|
|
57
|
+
transformed_blocks.prepend(transform_metadata(notebook)) if @include_frontmatter
|
|
58
|
+
transformed_blocks.flatten
|
|
68
59
|
end
|
|
69
60
|
|
|
70
|
-
def
|
|
61
|
+
def decorate_cell(rows, cell, symbol)
|
|
62
|
+
tags = cell['metadata']&.fetch('tags', [])
|
|
63
|
+
type = cell['cell_type'] || 'raw'
|
|
64
|
+
|
|
71
65
|
[
|
|
72
|
-
%(
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
66
|
+
symbol, %(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}),
|
|
67
|
+
_,
|
|
68
|
+
*rows,
|
|
69
|
+
_
|
|
76
70
|
]
|
|
77
71
|
end
|
|
78
72
|
|
|
79
|
-
def
|
|
80
|
-
|
|
73
|
+
def transform_cell(cell, notebook, symbol)
|
|
74
|
+
cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook, symbol) : transform_text_cell(cell, symbol)
|
|
75
|
+
end
|
|
81
76
|
|
|
82
|
-
|
|
77
|
+
def transform_code_cell(cell, notebook, symbol)
|
|
78
|
+
[
|
|
79
|
+
symbol / 'source', %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}),
|
|
80
|
+
symbolize_array(symbol / 'source', cell['source'], &:rstrip),
|
|
81
|
+
_('```'),
|
|
82
|
+
cell['outputs'].map.with_index do |output, idx|
|
|
83
|
+
@output_transformer.transform(output, symbol / ['outputs', idx])
|
|
84
|
+
end
|
|
85
|
+
]
|
|
83
86
|
end
|
|
84
87
|
|
|
85
|
-
def transform_text_cell(cell)
|
|
86
|
-
source
|
|
87
|
-
(source.is_a?(Array) ? source : [source]).append("\n")
|
|
88
|
+
def transform_text_cell(cell, symbol)
|
|
89
|
+
symbolize_array(symbol / 'source', cell['source'], &:rstrip)
|
|
88
90
|
end
|
|
89
91
|
|
|
90
92
|
def transform_metadata(notebook_json)
|
|
91
|
-
{
|
|
93
|
+
as_yaml = {
|
|
92
94
|
'jupyter' => {
|
|
93
95
|
'kernelspec' => notebook_json['metadata']['kernelspec'],
|
|
94
96
|
'language_info' => notebook_json['metadata']['language_info'],
|
|
95
97
|
'nbformat' => notebook_json['nbformat'],
|
|
96
98
|
'nbformat_minor' => notebook_json['nbformat_minor']
|
|
97
99
|
}
|
|
98
|
-
}.to_yaml
|
|
100
|
+
}.to_yaml
|
|
101
|
+
|
|
102
|
+
as_yaml.split("\n").map { |l| _(l) }.append(_('---'), _)
|
|
99
103
|
end
|
|
100
104
|
end
|
|
101
105
|
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ipynbdiff
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Eduardo Bonet
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2022-01-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: diffy
|
|
@@ -114,6 +114,20 @@ dependencies:
|
|
|
114
114
|
- - ">="
|
|
115
115
|
- !ruby/object:Gem::Version
|
|
116
116
|
version: '0'
|
|
117
|
+
- !ruby/object:Gem::Dependency
|
|
118
|
+
name: rspec-parametized
|
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
|
120
|
+
requirements:
|
|
121
|
+
- - ">="
|
|
122
|
+
- !ruby/object:Gem::Version
|
|
123
|
+
version: '0'
|
|
124
|
+
type: :development
|
|
125
|
+
prerelease: false
|
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
127
|
+
requirements:
|
|
128
|
+
- - ">="
|
|
129
|
+
- !ruby/object:Gem::Version
|
|
130
|
+
version: '0'
|
|
117
131
|
description: Better diff for Jupyter Notebooks by first preprocessing them and removing
|
|
118
132
|
clutter
|
|
119
133
|
email: ebonet@gitlab.com
|
|
@@ -124,18 +138,25 @@ files:
|
|
|
124
138
|
- ".VERSION.TMPL"
|
|
125
139
|
- ".gitignore"
|
|
126
140
|
- ".gitlab-ci.yml"
|
|
141
|
+
- ".rubocop.yml"
|
|
142
|
+
- ".rubocop_todo.yml"
|
|
127
143
|
- Gemfile
|
|
128
144
|
- Gemfile.lock
|
|
129
145
|
- README.md
|
|
130
146
|
- ipynbdiff.gemspec
|
|
147
|
+
- lib/diff.rb
|
|
148
|
+
- lib/ipynb_symbol_map.rb
|
|
131
149
|
- lib/ipynbdiff.rb
|
|
132
150
|
- lib/output_transformer.rb
|
|
151
|
+
- lib/symbolized_markdown_helper.rb
|
|
152
|
+
- lib/transformed_notebook.rb
|
|
133
153
|
- lib/transformer.rb
|
|
134
154
|
- lib/version.rb
|
|
135
155
|
homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
|
|
136
156
|
licenses:
|
|
137
157
|
- MIT
|
|
138
|
-
metadata:
|
|
158
|
+
metadata:
|
|
159
|
+
rubygems_mfa_required: 'true'
|
|
139
160
|
post_install_message:
|
|
140
161
|
rdoc_options: []
|
|
141
162
|
require_paths:
|