ipynbdiff 0.3.7 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.VERSION.TMPL +5 -0
- data/.gitlab-ci.yml +37 -1
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +31 -0
- data/Gemfile +2 -1
- data/Gemfile.lock +21 -1
- data/README.md +5 -6
- data/ipynbdiff.gemspec +11 -5
- data/lib/diff.rb +20 -0
- data/lib/ipynb_symbol_map.rb +151 -0
- data/lib/ipynbdiff.rb +8 -43
- data/lib/output_transformer.rb +42 -27
- data/lib/symbolized_markdown_helper.rb +30 -0
- data/lib/transformed_notebook.rb +20 -0
- data/lib/transformer.rb +51 -46
- data/lib/version.rb +5 -0
- metadata +38 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97969c0adb99db01eecef0831de1cbdb6eb1d2bfc58233195c76d475c8c4a97f
|
4
|
+
data.tar.gz: e04aa8f6b704f85d539eb6b59021f67ad3ca1c032a8fd3051115545828ad2c67
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9699f6249f6c47df87d0854069da21a484a91ef5974cd58570660e48bee7cadda633c7497396875e21d83221b00eaa8b8eb7d1e502340b0dec78718e7735d8b0
|
7
|
+
data.tar.gz: 63a1153092296010189a76dc3c9eadb6cf421603b6199189a9d8681048820fc607123aa41a65c08169a8497e7a28795a0325f790fd792105374268d0996e5e0a
|
data/.VERSION.TMPL
ADDED
data/.gitlab-ci.yml
CHANGED
@@ -1,6 +1,42 @@
|
|
1
|
+
image: ruby:2.7
|
2
|
+
|
3
|
+
stages:
|
4
|
+
- test
|
5
|
+
- build
|
6
|
+
- rubygems
|
7
|
+
|
1
8
|
specs:
|
2
9
|
stage: test
|
3
|
-
image: ruby:2.7
|
4
10
|
script:
|
5
11
|
- bundle install
|
6
12
|
- bundle exec rspec
|
13
|
+
|
14
|
+
build-gem:
|
15
|
+
stage: build
|
16
|
+
script:
|
17
|
+
- bundle install
|
18
|
+
- cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.0/ > lib/version.rb
|
19
|
+
- gem build ipynbdiff.gemspec
|
20
|
+
artifacts:
|
21
|
+
paths:
|
22
|
+
- ipynbdiff-0.0.0.gem
|
23
|
+
needs:
|
24
|
+
- specs
|
25
|
+
|
26
|
+
deploy-gem:
|
27
|
+
stage: rubygems
|
28
|
+
script:
|
29
|
+
- bundle install
|
30
|
+
- cat .VERSION.TMPL | sed s/GEM_VERSION/$CI_COMMIT_TAG/ > lib/version.rb
|
31
|
+
- gem build ipynbdiff.gemspec
|
32
|
+
- gem push ipynbdiff-$CI_COMMIT_TAG.gem
|
33
|
+
only:
|
34
|
+
- tags
|
35
|
+
except:
|
36
|
+
- branches
|
37
|
+
needs:
|
38
|
+
- build-gem
|
39
|
+
when: manual
|
40
|
+
|
41
|
+
|
42
|
+
|
data/.rubocop.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2021-12-22 14:13:29 UTC using RuboCop version 1.23.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 1
|
10
|
+
# Configuration parameters: Include.
|
11
|
+
# Include: **/*.gemspec
|
12
|
+
Gemspec/RequiredRubyVersion:
|
13
|
+
Exclude:
|
14
|
+
- 'ipynbdiff.gemspec'
|
15
|
+
|
16
|
+
AllCops:
|
17
|
+
NewCops: enable
|
18
|
+
|
19
|
+
Style/StringConcatenation:
|
20
|
+
Enabled: false
|
21
|
+
|
22
|
+
# Offense count: 6
|
23
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
|
24
|
+
# IgnoredMethods: refine
|
25
|
+
Metrics/BlockLength:
|
26
|
+
Enabled: false
|
27
|
+
|
28
|
+
# Offense count: 3
|
29
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
|
30
|
+
Metrics/MethodLength:
|
31
|
+
Enabled: false
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,9 +1,18 @@
|
|
1
1
|
GEM
|
2
2
|
remote: https://rubygems.org/
|
3
3
|
specs:
|
4
|
+
ast (2.4.2)
|
5
|
+
binding_ninja (0.2.3)
|
6
|
+
coderay (1.1.3)
|
4
7
|
diff-lcs (1.4.4)
|
5
8
|
diffy (3.3.0)
|
6
9
|
json (2.5.1)
|
10
|
+
parser (3.0.2.0)
|
11
|
+
ast (~> 2.4.1)
|
12
|
+
proc_to_ast (0.1.0)
|
13
|
+
coderay
|
14
|
+
parser
|
15
|
+
unparser
|
7
16
|
rspec (3.10.0)
|
8
17
|
rspec-core (~> 3.10.0)
|
9
18
|
rspec-expectations (~> 3.10.0)
|
@@ -16,15 +25,26 @@ GEM
|
|
16
25
|
rspec-mocks (3.10.2)
|
17
26
|
diff-lcs (>= 1.2.0, < 2.0)
|
18
27
|
rspec-support (~> 3.10.0)
|
28
|
+
rspec-parameterized (0.5.0)
|
29
|
+
binding_ninja (>= 0.2.3)
|
30
|
+
parser
|
31
|
+
proc_to_ast
|
32
|
+
rspec (>= 2.13, < 4)
|
33
|
+
unparser
|
19
34
|
rspec-support (3.10.2)
|
35
|
+
unparser (0.6.0)
|
36
|
+
diff-lcs (~> 1.3)
|
37
|
+
parser (>= 3.0.0)
|
20
38
|
|
21
39
|
PLATFORMS
|
40
|
+
ruby
|
22
41
|
x86_64-darwin-20
|
23
42
|
|
24
43
|
DEPENDENCIES
|
25
44
|
diffy (= 3.3.0)
|
26
45
|
json (= 2.5.1)
|
27
46
|
rspec (= 3.10.0)
|
47
|
+
rspec-parameterized (= 0.5.0)
|
28
48
|
|
29
49
|
BUNDLED WITH
|
30
|
-
2.2.
|
50
|
+
2.2.30
|
data/README.md
CHANGED
@@ -7,10 +7,10 @@ that the entire file is readable on the diff.
|
|
7
7
|
|
8
8
|
The result are diffs that are much easier to read:
|
9
9
|
|
10
|
-
| Diff |
|
11
|
-
| ------ |
|
12
|
-
| [Here](example/diff.txt) |
|
13
|
-
|  |
|
10
|
+
| Diff | | IpynbDiff |
|
11
|
+
| ------ | ------ |
|
12
|
+
| [Here](example/diff.txt) | [Here](example/ipynbdiff_percent.txt) |
|
13
|
+
|  |  |
|
14
14
|
|
15
15
|
|
16
16
|
This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
|
@@ -52,7 +52,6 @@ Options:
|
|
52
52
|
|
53
53
|
```ruby
|
54
54
|
@default_transform_options = {
|
55
|
-
|
56
|
-
cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
|
55
|
+
include_frontmatter: false, # Whether to include or not the notebook metadata (kernel, language, etc)
|
57
56
|
}
|
58
57
|
```
|
data/ipynbdiff.gemspec
CHANGED
@@ -1,29 +1,35 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative 'lib/version'
|
4
|
+
|
3
5
|
Gem::Specification.new do |s|
|
4
6
|
s.name = 'ipynbdiff'
|
5
|
-
s.version =
|
7
|
+
s.version = IpynbDiff::VERSION
|
6
8
|
s.summary = 'Human Readable diffs for Jupyter Notebooks'
|
7
9
|
s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
|
8
10
|
s.authors = ['Eduardo Bonet']
|
9
11
|
s.email = 'ebonet@gitlab.com'
|
10
12
|
# Specify which files should be added to the gem when it is released.
|
11
13
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
12
|
-
s.files
|
14
|
+
s.files = Dir.chdir(File.expand_path(__dir__)) do
|
13
15
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec|example)/}) }
|
14
16
|
end
|
15
|
-
s.homepage
|
17
|
+
s.homepage =
|
16
18
|
'https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff'
|
17
19
|
s.license = 'MIT'
|
18
20
|
|
19
21
|
s.require_paths = ['lib']
|
20
22
|
|
21
|
-
s.add_runtime_dependency 'diffy', '3.3
|
22
|
-
s.add_runtime_dependency 'json', '2.5.1'
|
23
|
+
s.add_runtime_dependency 'diffy', '~> 3.3'
|
24
|
+
s.add_runtime_dependency 'json', '~> 2.5', '>= 2.5.1'
|
23
25
|
|
24
26
|
s.add_development_dependency 'bundler', '~> 2.2'
|
25
27
|
s.add_development_dependency 'guard-rspec'
|
26
28
|
s.add_development_dependency 'pry'
|
27
29
|
s.add_development_dependency 'rake'
|
28
30
|
s.add_development_dependency 'rspec'
|
31
|
+
s.add_development_dependency 'rspec-parametized'
|
32
|
+
s.metadata = {
|
33
|
+
'rubygems_mfa_required' => 'true'
|
34
|
+
}
|
29
35
|
end
|
data/lib/diff.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Custom differ for Jupyter Notebooks
|
4
|
+
module IpynbDiff
|
5
|
+
require 'delegate'
|
6
|
+
|
7
|
+
# The result of a diff object
|
8
|
+
class Diff < SimpleDelegator
|
9
|
+
require 'diffy'
|
10
|
+
|
11
|
+
attr_reader :from, :to
|
12
|
+
|
13
|
+
def initialize(from, to, diffy_opts)
|
14
|
+
super(Diffy::Diff.new(from.as_text, to.as_text, **diffy_opts))
|
15
|
+
|
16
|
+
@from = from
|
17
|
+
@to = to
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module IpynbDiff
|
4
|
+
class InvalidTokenError < StandardError
|
5
|
+
end
|
6
|
+
|
7
|
+
# Creates a symbol map for a ipynb file (JSON format)
|
8
|
+
class IpynbSymbolMap
|
9
|
+
class << self
|
10
|
+
def parse(notebook)
|
11
|
+
IpynbSymbolMap.new(notebook).parse('')
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_reader :current_line, :char_idx, :results
|
16
|
+
|
17
|
+
WHITESPACE_CHARS = ["\t", "\r", ' ', "\n"].freeze
|
18
|
+
|
19
|
+
VALUE_STOPPERS = [',', '[', ']', '{', '}', *WHITESPACE_CHARS].freeze
|
20
|
+
|
21
|
+
def initialize(notebook)
|
22
|
+
@chars = notebook.chars
|
23
|
+
@current_line = 0
|
24
|
+
@char_idx = 0
|
25
|
+
@results = {}
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse(prefix = '.')
|
29
|
+
skip_whitespaces
|
30
|
+
|
31
|
+
if (c = current_char) == '"'
|
32
|
+
parse_string
|
33
|
+
elsif c == '['
|
34
|
+
parse_array(prefix)
|
35
|
+
elsif c == '{'
|
36
|
+
parse_object(prefix)
|
37
|
+
else
|
38
|
+
parse_value
|
39
|
+
end
|
40
|
+
|
41
|
+
results
|
42
|
+
end
|
43
|
+
|
44
|
+
def parse_array(prefix)
|
45
|
+
# [1, 2, {"some": "object"}, [1]]
|
46
|
+
|
47
|
+
i = 0
|
48
|
+
|
49
|
+
current_should_be '['
|
50
|
+
|
51
|
+
loop do
|
52
|
+
break if skip_beginning(']')
|
53
|
+
|
54
|
+
new_prefix = "#{prefix}.#{i}"
|
55
|
+
|
56
|
+
add_result(new_prefix, current_line)
|
57
|
+
|
58
|
+
parse(new_prefix)
|
59
|
+
|
60
|
+
i += 1
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def parse_object(prefix)
|
65
|
+
# {"name":"value", "another_name": [1, 2, 3]}
|
66
|
+
|
67
|
+
current_should_be '{'
|
68
|
+
|
69
|
+
loop do
|
70
|
+
break if skip_beginning('}')
|
71
|
+
|
72
|
+
prop_name = parse_string
|
73
|
+
|
74
|
+
new_prefix = "#{prefix}.#{prop_name}"
|
75
|
+
|
76
|
+
add_result(new_prefix, current_line)
|
77
|
+
|
78
|
+
next_and_skip_whitespaces
|
79
|
+
|
80
|
+
current_should_be ':'
|
81
|
+
|
82
|
+
next_and_skip_whitespaces
|
83
|
+
|
84
|
+
parse(new_prefix)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def parse_string
|
89
|
+
value = ''
|
90
|
+
prev_char = nil
|
91
|
+
|
92
|
+
current_should_be '"'
|
93
|
+
|
94
|
+
loop do
|
95
|
+
increment_char_index
|
96
|
+
break if (c = current_char) == '"' && prev_char != '\\'
|
97
|
+
|
98
|
+
value += (prev_char = c)
|
99
|
+
end
|
100
|
+
|
101
|
+
value
|
102
|
+
end
|
103
|
+
|
104
|
+
def add_result(key, line_number)
|
105
|
+
@results[key] = line_number
|
106
|
+
end
|
107
|
+
|
108
|
+
def parse_value
|
109
|
+
increment_char_index until VALUE_STOPPERS.include?(current_char)
|
110
|
+
end
|
111
|
+
|
112
|
+
def skip_whitespaces
|
113
|
+
while WHITESPACE_CHARS.include?(current_char)
|
114
|
+
check_for_new_line
|
115
|
+
increment_char_index
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def increment_char_index
|
120
|
+
@char_idx += 1
|
121
|
+
end
|
122
|
+
|
123
|
+
def next_and_skip_whitespaces
|
124
|
+
increment_char_index
|
125
|
+
skip_whitespaces
|
126
|
+
end
|
127
|
+
|
128
|
+
def current_char
|
129
|
+
@chars[@char_idx]
|
130
|
+
end
|
131
|
+
|
132
|
+
def current_should_be(another_char)
|
133
|
+
raise InvalidTokenError unless current_char == another_char
|
134
|
+
end
|
135
|
+
|
136
|
+
def check_for_new_line
|
137
|
+
@current_line += 1 if current_char == "\n"
|
138
|
+
end
|
139
|
+
|
140
|
+
def skip_beginning(closing_char)
|
141
|
+
|
142
|
+
check_for_new_line
|
143
|
+
|
144
|
+
next_and_skip_whitespaces
|
145
|
+
|
146
|
+
return true if current_char == closing_char
|
147
|
+
|
148
|
+
next_and_skip_whitespaces if current_char == ','
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
data/lib/ipynbdiff.rb
CHANGED
@@ -3,55 +3,20 @@
|
|
3
3
|
# Human Readable Jupyter Diffs
|
4
4
|
module IpynbDiff
|
5
5
|
require 'transformer'
|
6
|
-
require '
|
6
|
+
require 'diff'
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
cell_decorator: :html
|
11
|
-
}
|
8
|
+
def self.diff(from, to, raise_if_invalid_nb: false, include_frontmatter: false, diffy_opts: {})
|
9
|
+
transformer = Transformer.new(include_frontmatter: include_frontmatter)
|
12
10
|
|
13
|
-
|
14
|
-
preprocess_input: true,
|
15
|
-
write_output_to: nil,
|
16
|
-
format: :text,
|
17
|
-
sources_are_files: false,
|
18
|
-
raise_if_invalid_notebook: false,
|
19
|
-
transform_options: @default_transform_options,
|
20
|
-
diff_opts: {
|
21
|
-
include_diff_info: false
|
22
|
-
}
|
23
|
-
}.freeze
|
24
|
-
|
25
|
-
def self.prepare_input(to_prepare, options)
|
26
|
-
return '' unless to_prepare
|
27
|
-
|
28
|
-
prep = to_prepare
|
29
|
-
prep = File.read(prep) if options[:sources_are_files]
|
30
|
-
prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
|
31
|
-
prep
|
32
|
-
end
|
33
|
-
|
34
|
-
def self.diff(
|
35
|
-
from_notebook,
|
36
|
-
to_notebook,
|
37
|
-
options = @default_diff_options
|
38
|
-
)
|
39
|
-
options = @default_diff_options.merge(options)
|
40
|
-
|
41
|
-
from = prepare_input(from_notebook, options)
|
42
|
-
to = prepare_input(to_notebook, options)
|
43
|
-
|
44
|
-
d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
|
45
|
-
File.write(options[:write_output_to], d) if options[:write_output_to]
|
46
|
-
d
|
11
|
+
Diff.new(transformer.transform(from), transformer.transform(to), diffy_opts)
|
47
12
|
rescue InvalidNotebookError
|
48
|
-
raise if
|
13
|
+
raise if raise_if_invalid_nb
|
49
14
|
end
|
50
15
|
|
51
|
-
def self.transform(notebook, raise_errors: false,
|
52
|
-
|
16
|
+
def self.transform(notebook, raise_errors: false, include_frontmatter: true)
|
17
|
+
return unless notebook
|
53
18
|
|
54
|
-
Transformer.new(
|
19
|
+
Transformer.new(include_frontmatter: include_frontmatter).transform(notebook).as_text
|
55
20
|
rescue InvalidNotebookError
|
56
21
|
raise if raise_errors
|
57
22
|
end
|
data/lib/output_transformer.rb
CHANGED
@@ -1,61 +1,76 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module IpynbDiff
|
4
|
-
|
5
4
|
# Transforms Jupyter output data into markdown
|
6
5
|
class OutputTransformer
|
6
|
+
require 'symbolized_markdown_helper'
|
7
|
+
include SymbolizedMarkdownHelper
|
7
8
|
|
8
9
|
ORDERED_KEYS = {
|
9
10
|
'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
|
10
11
|
'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
|
11
12
|
}.freeze
|
12
13
|
|
13
|
-
def transform(output)
|
14
|
-
case (output_type = output['output_type'])
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
14
|
+
def transform(output, symbol)
|
15
|
+
transformed = case (output_type = output['output_type'])
|
16
|
+
when 'error'
|
17
|
+
transform_error(output['traceback'], symbol / 'traceback')
|
18
|
+
when 'execute_result', 'display_data'
|
19
|
+
transform_non_error(ORDERED_KEYS[output_type], output['data'], symbol / 'data')
|
20
|
+
end
|
21
|
+
|
22
|
+
decorate_output(transformed, output, symbol) if transformed
|
20
23
|
end
|
21
24
|
|
22
|
-
def
|
23
|
-
|
24
|
-
|
25
|
-
|
25
|
+
def decorate_output(output_rows, output, symbol)
|
26
|
+
[
|
27
|
+
_,
|
28
|
+
symbol, %(%%%% Output: #{output['output_type']}),
|
29
|
+
_,
|
30
|
+
*output_rows
|
31
|
+
]
|
32
|
+
end
|
33
|
+
|
34
|
+
def transform_error(traceback, symbol)
|
35
|
+
traceback.map.with_index do |t, idx|
|
36
|
+
t.split("\n").map do |l|
|
37
|
+
[symbol / idx, l.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip]
|
26
38
|
end
|
27
39
|
end
|
28
40
|
end
|
29
41
|
|
30
|
-
def transform_non_error(accepted_keys, elements)
|
31
|
-
accepted_keys.map do |key|
|
32
|
-
transform_element(key, elements[key]
|
33
|
-
end
|
42
|
+
def transform_non_error(accepted_keys, elements, symbol)
|
43
|
+
accepted_keys.filter { |key| elements.key?(key) }.map do |key|
|
44
|
+
transform_element(key, elements[key], symbol)
|
45
|
+
end
|
34
46
|
end
|
35
47
|
|
36
|
-
def transform_element(output_type, output_element)
|
48
|
+
def transform_element(output_type, output_element, symbol_prefix)
|
49
|
+
new_symbol = symbol_prefix / output_type
|
37
50
|
case output_type
|
38
51
|
when 'image/png', 'image/jpeg'
|
39
|
-
transform_image(output_type, output_element)
|
52
|
+
transform_image(output_type, output_element, new_symbol)
|
40
53
|
when 'image/svg+xml'
|
41
|
-
transform_svg(output_element)
|
54
|
+
transform_svg(output_element, new_symbol)
|
42
55
|
when 'text/markdown', 'text/latex', 'text/plain'
|
43
|
-
transform_text(output_element)
|
56
|
+
transform_text(output_element, new_symbol)
|
44
57
|
end
|
45
58
|
end
|
46
59
|
|
47
|
-
def transform_image(image_type, image_content)
|
48
|
-
[" })"
|
60
|
+
def transform_image(image_type, image_content, symbol)
|
61
|
+
[symbol, " })"]
|
49
62
|
end
|
50
63
|
|
51
|
-
def transform_svg(image_content)
|
52
|
-
|
64
|
+
def transform_svg(image_content, symbol)
|
65
|
+
lines = image_content.is_a?(Array) ? image_content : [image_content]
|
66
|
+
|
67
|
+
single_line = lines.map(&:strip).join.gsub(/\s+/, ' ')
|
53
68
|
|
54
|
-
[" "
|
69
|
+
[symbol, " "]
|
55
70
|
end
|
56
71
|
|
57
|
-
def transform_text(text_content)
|
58
|
-
text_content
|
72
|
+
def transform_text(text_content, symbol)
|
73
|
+
symbolize_array(symbol, text_content) { |l| " #{l.rstrip}" }
|
59
74
|
end
|
60
75
|
end
|
61
76
|
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module IpynbDiff
|
4
|
+
# Helper functions
|
5
|
+
module SymbolizedMarkdownHelper
|
6
|
+
|
7
|
+
def _(content = '')
|
8
|
+
[nil, content]
|
9
|
+
end
|
10
|
+
|
11
|
+
def array_if_not_array(thing)
|
12
|
+
thing.is_a?(Array) ? thing : [thing]
|
13
|
+
end
|
14
|
+
|
15
|
+
def symbolize_array(symbol, content, &block)
|
16
|
+
if content.is_a?(Array)
|
17
|
+
content.map.with_index { |l, idx| [symbol / idx, block.call(l)] }
|
18
|
+
else
|
19
|
+
[symbol, content]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Simple wrapper for a string
|
25
|
+
class JsonSymbol < String
|
26
|
+
def /(other)
|
27
|
+
JsonSymbol.new((other.is_a?(Array) ? [self, *other] : [self, other]).join('.'))
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module IpynbDiff
|
4
|
+
# Notebook that was transformed into md, including location of source cells
|
5
|
+
class TransformedNotebook
|
6
|
+
attr_reader :blocks
|
7
|
+
|
8
|
+
def as_text
|
9
|
+
@blocks.map { |b| b[:content] }.join("\n")
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def initialize(lines = [], symbols = [], symbol_map = {})
|
15
|
+
@blocks = lines.zip(symbols).map do |line, symbol|
|
16
|
+
{ content: line, source_symbol: symbol, source_line: symbol && symbol_map[symbol] }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/transformer.rb
CHANGED
@@ -9,21 +9,22 @@ module IpynbDiff
|
|
9
9
|
require 'json'
|
10
10
|
require 'yaml'
|
11
11
|
require 'output_transformer'
|
12
|
+
require 'symbolized_markdown_helper'
|
13
|
+
require 'ipynb_symbol_map'
|
14
|
+
require 'transformed_notebook'
|
15
|
+
include SymbolizedMarkdownHelper
|
12
16
|
|
13
|
-
@
|
14
|
-
@include_metadata = true
|
17
|
+
@include_frontmatter = true
|
15
18
|
|
16
|
-
|
17
|
-
|
18
|
-
@include_metadata = include_metadata
|
19
|
-
@cell_decorator = cell_decorator
|
19
|
+
def initialize(include_frontmatter: true)
|
20
|
+
@include_frontmatter = include_frontmatter
|
20
21
|
@output_transformer = OutputTransformer.new
|
21
22
|
end
|
22
23
|
|
23
24
|
def validate_notebook(notebook)
|
24
25
|
notebook_json = JSON.parse(notebook)
|
25
26
|
|
26
|
-
return notebook_json if notebook_json.key?('cells')
|
27
|
+
return notebook_json if notebook_json.key?('cells')
|
27
28
|
|
28
29
|
raise InvalidNotebookError
|
29
30
|
rescue JSON::ParserError
|
@@ -31,70 +32,74 @@ module IpynbDiff
|
|
31
32
|
end
|
32
33
|
|
33
34
|
def transform(notebook)
|
34
|
-
|
35
|
-
transformed_blocks = notebook_json['cells'].map do |cell|
|
36
|
-
decorate_cell(transform_cell(cell, notebook_json), cell)
|
37
|
-
end
|
35
|
+
return TransformedNotebook.new unless notebook
|
38
36
|
|
39
|
-
|
40
|
-
|
41
|
-
|
37
|
+
notebook_json = validate_notebook(notebook)
|
38
|
+
transformed = transform_document(notebook_json)
|
39
|
+
symbol_map = IpynbSymbolMap.parse(notebook)
|
42
40
|
|
43
|
-
|
44
|
-
|
45
|
-
|
41
|
+
symbols, lines = if transformed && !transformed.empty?
|
42
|
+
transformed.partition.each_with_index { |_el, i| i.even? }
|
43
|
+
else
|
44
|
+
[[], []]
|
45
|
+
end
|
46
46
|
|
47
|
-
|
48
|
-
when :html
|
49
|
-
rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
|
50
|
-
.append("\n</div>\n")
|
51
|
-
when :percent
|
52
|
-
rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
|
53
|
-
else
|
54
|
-
rows
|
55
|
-
end.join('')
|
47
|
+
TransformedNotebook.new(lines, symbols, symbol_map)
|
56
48
|
end
|
57
49
|
|
58
|
-
def
|
59
|
-
|
60
|
-
end
|
50
|
+
def transform_document(notebook)
|
51
|
+
symbol = JsonSymbol.new('.cells')
|
61
52
|
|
62
|
-
|
63
|
-
|
64
|
-
output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
|
65
|
-
else
|
66
|
-
output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
|
53
|
+
transformed_blocks = notebook['cells'].map.with_index do |cell, idx|
|
54
|
+
decorate_cell(transform_cell(cell, notebook, symbol / idx), cell, symbol / idx)
|
67
55
|
end
|
56
|
+
|
57
|
+
transformed_blocks.prepend(transform_metadata(notebook)) if @include_frontmatter
|
58
|
+
transformed_blocks.flatten
|
68
59
|
end
|
69
60
|
|
70
|
-
def
|
61
|
+
def decorate_cell(rows, cell, symbol)
|
62
|
+
tags = cell['metadata']&.fetch('tags', [])
|
63
|
+
type = cell['cell_type'] || 'raw'
|
64
|
+
|
71
65
|
[
|
72
|
-
%(
|
73
|
-
|
74
|
-
|
75
|
-
|
66
|
+
symbol, %(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}),
|
67
|
+
_,
|
68
|
+
*rows,
|
69
|
+
_
|
76
70
|
]
|
77
71
|
end
|
78
72
|
|
79
|
-
def
|
80
|
-
|
73
|
+
def transform_cell(cell, notebook, symbol)
|
74
|
+
cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook, symbol) : transform_text_cell(cell, symbol)
|
75
|
+
end
|
81
76
|
|
82
|
-
|
77
|
+
def transform_code_cell(cell, notebook, symbol)
|
78
|
+
[
|
79
|
+
symbol / 'source', %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}),
|
80
|
+
symbolize_array(symbol / 'source', cell['source'], &:rstrip),
|
81
|
+
_('```'),
|
82
|
+
cell['outputs'].map.with_index do |output, idx|
|
83
|
+
@output_transformer.transform(output, symbol / ['outputs', idx])
|
84
|
+
end
|
85
|
+
]
|
83
86
|
end
|
84
87
|
|
85
|
-
def transform_text_cell(cell)
|
86
|
-
cell['source']
|
88
|
+
def transform_text_cell(cell, symbol)
|
89
|
+
symbolize_array(symbol / 'source', cell['source'], &:rstrip)
|
87
90
|
end
|
88
91
|
|
89
92
|
def transform_metadata(notebook_json)
|
90
|
-
{
|
93
|
+
as_yaml = {
|
91
94
|
'jupyter' => {
|
92
95
|
'kernelspec' => notebook_json['metadata']['kernelspec'],
|
93
96
|
'language_info' => notebook_json['metadata']['language_info'],
|
94
97
|
'nbformat' => notebook_json['nbformat'],
|
95
98
|
'nbformat_minor' => notebook_json['nbformat_minor']
|
96
99
|
}
|
97
|
-
}.to_yaml
|
100
|
+
}.to_yaml
|
101
|
+
|
102
|
+
as_yaml.split("\n").map { |l| _(l) }.append(_('---'), _)
|
98
103
|
end
|
99
104
|
end
|
100
105
|
end
|
data/lib/version.rb
ADDED
metadata
CHANGED
@@ -1,41 +1,47 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ipynbdiff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eduardo Bonet
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: diffy
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 3.3
|
19
|
+
version: '3.3'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 3.3
|
26
|
+
version: '3.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: json
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.5'
|
34
|
+
- - ">="
|
32
35
|
- !ruby/object:Gem::Version
|
33
36
|
version: 2.5.1
|
34
37
|
type: :runtime
|
35
38
|
prerelease: false
|
36
39
|
version_requirements: !ruby/object:Gem::Requirement
|
37
40
|
requirements:
|
38
|
-
- -
|
41
|
+
- - "~>"
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '2.5'
|
44
|
+
- - ">="
|
39
45
|
- !ruby/object:Gem::Version
|
40
46
|
version: 2.5.1
|
41
47
|
- !ruby/object:Gem::Dependency
|
@@ -108,6 +114,20 @@ dependencies:
|
|
108
114
|
- - ">="
|
109
115
|
- !ruby/object:Gem::Version
|
110
116
|
version: '0'
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: rspec-parametized
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
type: :development
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
111
131
|
description: Better diff for Jupyter Notebooks by first preprocessing them and removing
|
112
132
|
clutter
|
113
133
|
email: ebonet@gitlab.com
|
@@ -115,19 +135,28 @@ executables: []
|
|
115
135
|
extensions: []
|
116
136
|
extra_rdoc_files: []
|
117
137
|
files:
|
138
|
+
- ".VERSION.TMPL"
|
118
139
|
- ".gitignore"
|
119
140
|
- ".gitlab-ci.yml"
|
141
|
+
- ".rubocop.yml"
|
142
|
+
- ".rubocop_todo.yml"
|
120
143
|
- Gemfile
|
121
144
|
- Gemfile.lock
|
122
145
|
- README.md
|
123
146
|
- ipynbdiff.gemspec
|
147
|
+
- lib/diff.rb
|
148
|
+
- lib/ipynb_symbol_map.rb
|
124
149
|
- lib/ipynbdiff.rb
|
125
150
|
- lib/output_transformer.rb
|
151
|
+
- lib/symbolized_markdown_helper.rb
|
152
|
+
- lib/transformed_notebook.rb
|
126
153
|
- lib/transformer.rb
|
154
|
+
- lib/version.rb
|
127
155
|
homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
|
128
156
|
licenses:
|
129
157
|
- MIT
|
130
|
-
metadata:
|
158
|
+
metadata:
|
159
|
+
rubygems_mfa_required: 'true'
|
131
160
|
post_install_message:
|
132
161
|
rdoc_options: []
|
133
162
|
require_paths:
|