ipynbdiff 0.3.7 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 24615c877db39efeb4b4d0effe014db5a7734f6be88385aaf73bc00622075ca8
4
- data.tar.gz: 2aca335aa15c5e413eaa23d197f3f9a0f73d6415adbafcb419cf7764eb983e89
3
+ metadata.gz: 97969c0adb99db01eecef0831de1cbdb6eb1d2bfc58233195c76d475c8c4a97f
4
+ data.tar.gz: e04aa8f6b704f85d539eb6b59021f67ad3ca1c032a8fd3051115545828ad2c67
5
5
  SHA512:
6
- metadata.gz: 280688c5f5f722cea1868963dcf49eefdb3c35374fad96522d7f8b8a7c77b902d11db1b2b5dd1cde3370065159cee4f1c1f252b1a15f585c589abdc9ede02e1f
7
- data.tar.gz: 50de3ac58248b4af0e67d2e68a476a3c8395753a47638c7ca2015d7c909d55031f605f5334be54d909fd40e5728912623909b7fc3f73f8544b0828dd1e9e6a1a
6
+ metadata.gz: 9699f6249f6c47df87d0854069da21a484a91ef5974cd58570660e48bee7cadda633c7497396875e21d83221b00eaa8b8eb7d1e502340b0dec78718e7735d8b0
7
+ data.tar.gz: 63a1153092296010189a76dc3c9eadb6cf421603b6199189a9d8681048820fc607123aa41a65c08169a8497e7a28795a0325f790fd792105374268d0996e5e0a
data/.VERSION.TMPL ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "GEM_VERSION"
5
+ end
data/.gitlab-ci.yml CHANGED
@@ -1,6 +1,42 @@
1
+ image: ruby:2.7
2
+
3
+ stages:
4
+ - test
5
+ - build
6
+ - rubygems
7
+
1
8
  specs:
2
9
  stage: test
3
- image: ruby:2.7
4
10
  script:
5
11
  - bundle install
6
12
  - bundle exec rspec
13
+
14
+ build-gem:
15
+ stage: build
16
+ script:
17
+ - bundle install
18
+ - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.0/ > lib/version.rb
19
+ - gem build ipynbdiff.gemspec
20
+ artifacts:
21
+ paths:
22
+ - ipynbdiff-0.0.0.gem
23
+ needs:
24
+ - specs
25
+
26
+ deploy-gem:
27
+ stage: rubygems
28
+ script:
29
+ - bundle install
30
+ - cat .VERSION.TMPL | sed s/GEM_VERSION/$CI_COMMIT_TAG/ > lib/version.rb
31
+ - gem build ipynbdiff.gemspec
32
+ - gem push ipynbdiff-$CI_COMMIT_TAG.gem
33
+ only:
34
+ - tags
35
+ except:
36
+ - branches
37
+ needs:
38
+ - build-gem
39
+ when: manual
40
+
41
+
42
+
data/.rubocop.yml ADDED
@@ -0,0 +1 @@
1
+ inherit_from: .rubocop_todo.yml
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,31 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2021-12-22 14:13:29 UTC using RuboCop version 1.23.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ # Configuration parameters: Include.
11
+ # Include: **/*.gemspec
12
+ Gemspec/RequiredRubyVersion:
13
+ Exclude:
14
+ - 'ipynbdiff.gemspec'
15
+
16
+ AllCops:
17
+ NewCops: enable
18
+
19
+ Style/StringConcatenation:
20
+ Enabled: false
21
+
22
+ # Offense count: 6
23
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
24
+ # IgnoredMethods: refine
25
+ Metrics/BlockLength:
26
+ Enabled: false
27
+
28
+ # Offense count: 3
29
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
30
+ Metrics/MethodLength:
31
+ Enabled: false
data/Gemfile CHANGED
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- source "https://rubygems.org"
3
+ source 'https://rubygems.org'
4
4
 
5
5
  gem 'diffy', '3.3.0'
6
6
  gem 'json', '2.5.1'
7
7
  gem 'rspec', '3.10.0'
8
+ gem 'rspec-parameterized', '0.5.0'
data/Gemfile.lock CHANGED
@@ -1,9 +1,18 @@
1
1
  GEM
2
2
  remote: https://rubygems.org/
3
3
  specs:
4
+ ast (2.4.2)
5
+ binding_ninja (0.2.3)
6
+ coderay (1.1.3)
4
7
  diff-lcs (1.4.4)
5
8
  diffy (3.3.0)
6
9
  json (2.5.1)
10
+ parser (3.0.2.0)
11
+ ast (~> 2.4.1)
12
+ proc_to_ast (0.1.0)
13
+ coderay
14
+ parser
15
+ unparser
7
16
  rspec (3.10.0)
8
17
  rspec-core (~> 3.10.0)
9
18
  rspec-expectations (~> 3.10.0)
@@ -16,15 +25,26 @@ GEM
16
25
  rspec-mocks (3.10.2)
17
26
  diff-lcs (>= 1.2.0, < 2.0)
18
27
  rspec-support (~> 3.10.0)
28
+ rspec-parameterized (0.5.0)
29
+ binding_ninja (>= 0.2.3)
30
+ parser
31
+ proc_to_ast
32
+ rspec (>= 2.13, < 4)
33
+ unparser
19
34
  rspec-support (3.10.2)
35
+ unparser (0.6.0)
36
+ diff-lcs (~> 1.3)
37
+ parser (>= 3.0.0)
20
38
 
21
39
  PLATFORMS
40
+ ruby
22
41
  x86_64-darwin-20
23
42
 
24
43
  DEPENDENCIES
25
44
  diffy (= 3.3.0)
26
45
  json (= 2.5.1)
27
46
  rspec (= 3.10.0)
47
+ rspec-parameterized (= 0.5.0)
28
48
 
29
49
  BUNDLED WITH
30
- 2.2.29
50
+ 2.2.30
data/README.md CHANGED
@@ -7,10 +7,10 @@ that the entire file is readable on the diff.
7
7
 
8
8
  The result are diffs that are much easier to read:
9
9
 
10
- | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
- | ------ | ------ | ------ |
12
- | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
- | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
10
+ | Diff | | IpynbDiff |
11
+ | ------ | ------ |
12
+ | [Here](example/diff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
+ | ![](example/img/diff.png) | ![](example/img/ipynbdiff_percent.png) |
14
14
 
15
15
 
16
16
  This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
@@ -52,7 +52,6 @@ Options:
52
52
 
53
53
  ```ruby
54
54
  @default_transform_options = {
55
- include_metadata: false, # Whether to include or not the notebook metadata (kernel, language, etc)
56
- cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
55
+ include_frontmatter: false, # Whether to include or not the notebook metadata (kernel, language, etc)
57
56
  }
58
57
  ```
data/ipynbdiff.gemspec CHANGED
@@ -1,29 +1,35 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'lib/version'
4
+
3
5
  Gem::Specification.new do |s|
4
6
  s.name = 'ipynbdiff'
5
- s.version = ENV['LIB_VERSION']
7
+ s.version = IpynbDiff::VERSION
6
8
  s.summary = 'Human Readable diffs for Jupyter Notebooks'
7
9
  s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
8
10
  s.authors = ['Eduardo Bonet']
9
11
  s.email = 'ebonet@gitlab.com'
10
12
  # Specify which files should be added to the gem when it is released.
11
13
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
12
- s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
14
+ s.files = Dir.chdir(File.expand_path(__dir__)) do
13
15
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec|example)/}) }
14
16
  end
15
- s.homepage =
17
+ s.homepage =
16
18
  'https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff'
17
19
  s.license = 'MIT'
18
20
 
19
21
  s.require_paths = ['lib']
20
22
 
21
- s.add_runtime_dependency 'diffy', '3.3.0'
22
- s.add_runtime_dependency 'json', '2.5.1'
23
+ s.add_runtime_dependency 'diffy', '~> 3.3'
24
+ s.add_runtime_dependency 'json', '~> 2.5', '>= 2.5.1'
23
25
 
24
26
  s.add_development_dependency 'bundler', '~> 2.2'
25
27
  s.add_development_dependency 'guard-rspec'
26
28
  s.add_development_dependency 'pry'
27
29
  s.add_development_dependency 'rake'
28
30
  s.add_development_dependency 'rspec'
31
+ s.add_development_dependency 'rspec-parametized'
32
+ s.metadata = {
33
+ 'rubygems_mfa_required' => 'true'
34
+ }
29
35
  end
data/lib/diff.rb ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Custom differ for Jupyter Notebooks
4
+ module IpynbDiff
5
+ require 'delegate'
6
+
7
+ # The result of a diff object
8
+ class Diff < SimpleDelegator
9
+ require 'diffy'
10
+
11
+ attr_reader :from, :to
12
+
13
+ def initialize(from, to, diffy_opts)
14
+ super(Diffy::Diff.new(from.as_text, to.as_text, **diffy_opts))
15
+
16
+ @from = from
17
+ @to = to
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ class InvalidTokenError < StandardError
5
+ end
6
+
7
+ # Creates a symbol map for a ipynb file (JSON format)
8
+ class IpynbSymbolMap
9
+ class << self
10
+ def parse(notebook)
11
+ IpynbSymbolMap.new(notebook).parse('')
12
+ end
13
+ end
14
+
15
+ attr_reader :current_line, :char_idx, :results
16
+
17
+ WHITESPACE_CHARS = ["\t", "\r", ' ', "\n"].freeze
18
+
19
+ VALUE_STOPPERS = [',', '[', ']', '{', '}', *WHITESPACE_CHARS].freeze
20
+
21
+ def initialize(notebook)
22
+ @chars = notebook.chars
23
+ @current_line = 0
24
+ @char_idx = 0
25
+ @results = {}
26
+ end
27
+
28
+ def parse(prefix = '.')
29
+ skip_whitespaces
30
+
31
+ if (c = current_char) == '"'
32
+ parse_string
33
+ elsif c == '['
34
+ parse_array(prefix)
35
+ elsif c == '{'
36
+ parse_object(prefix)
37
+ else
38
+ parse_value
39
+ end
40
+
41
+ results
42
+ end
43
+
44
+ def parse_array(prefix)
45
+ # [1, 2, {"some": "object"}, [1]]
46
+
47
+ i = 0
48
+
49
+ current_should_be '['
50
+
51
+ loop do
52
+ break if skip_beginning(']')
53
+
54
+ new_prefix = "#{prefix}.#{i}"
55
+
56
+ add_result(new_prefix, current_line)
57
+
58
+ parse(new_prefix)
59
+
60
+ i += 1
61
+ end
62
+ end
63
+
64
+ def parse_object(prefix)
65
+ # {"name":"value", "another_name": [1, 2, 3]}
66
+
67
+ current_should_be '{'
68
+
69
+ loop do
70
+ break if skip_beginning('}')
71
+
72
+ prop_name = parse_string
73
+
74
+ new_prefix = "#{prefix}.#{prop_name}"
75
+
76
+ add_result(new_prefix, current_line)
77
+
78
+ next_and_skip_whitespaces
79
+
80
+ current_should_be ':'
81
+
82
+ next_and_skip_whitespaces
83
+
84
+ parse(new_prefix)
85
+ end
86
+ end
87
+
88
+ def parse_string
89
+ value = ''
90
+ prev_char = nil
91
+
92
+ current_should_be '"'
93
+
94
+ loop do
95
+ increment_char_index
96
+ break if (c = current_char) == '"' && prev_char != '\\'
97
+
98
+ value += (prev_char = c)
99
+ end
100
+
101
+ value
102
+ end
103
+
104
+ def add_result(key, line_number)
105
+ @results[key] = line_number
106
+ end
107
+
108
+ def parse_value
109
+ increment_char_index until VALUE_STOPPERS.include?(current_char)
110
+ end
111
+
112
+ def skip_whitespaces
113
+ while WHITESPACE_CHARS.include?(current_char)
114
+ check_for_new_line
115
+ increment_char_index
116
+ end
117
+ end
118
+
119
+ def increment_char_index
120
+ @char_idx += 1
121
+ end
122
+
123
+ def next_and_skip_whitespaces
124
+ increment_char_index
125
+ skip_whitespaces
126
+ end
127
+
128
+ def current_char
129
+ @chars[@char_idx]
130
+ end
131
+
132
+ def current_should_be(another_char)
133
+ raise InvalidTokenError unless current_char == another_char
134
+ end
135
+
136
+ def check_for_new_line
137
+ @current_line += 1 if current_char == "\n"
138
+ end
139
+
140
+ def skip_beginning(closing_char)
141
+
142
+ check_for_new_line
143
+
144
+ next_and_skip_whitespaces
145
+
146
+ return true if current_char == closing_char
147
+
148
+ next_and_skip_whitespaces if current_char == ','
149
+ end
150
+ end
151
+ end
data/lib/ipynbdiff.rb CHANGED
@@ -3,55 +3,20 @@
3
3
  # Human Readable Jupyter Diffs
4
4
  module IpynbDiff
5
5
  require 'transformer'
6
- require 'diffy'
6
+ require 'diff'
7
7
 
8
- @default_transform_options = {
9
- include_metadata: false,
10
- cell_decorator: :html
11
- }
8
+ def self.diff(from, to, raise_if_invalid_nb: false, include_frontmatter: false, diffy_opts: {})
9
+ transformer = Transformer.new(include_frontmatter: include_frontmatter)
12
10
 
13
- @default_diff_options = {
14
- preprocess_input: true,
15
- write_output_to: nil,
16
- format: :text,
17
- sources_are_files: false,
18
- raise_if_invalid_notebook: false,
19
- transform_options: @default_transform_options,
20
- diff_opts: {
21
- include_diff_info: false
22
- }
23
- }.freeze
24
-
25
- def self.prepare_input(to_prepare, options)
26
- return '' unless to_prepare
27
-
28
- prep = to_prepare
29
- prep = File.read(prep) if options[:sources_are_files]
30
- prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
31
- prep
32
- end
33
-
34
- def self.diff(
35
- from_notebook,
36
- to_notebook,
37
- options = @default_diff_options
38
- )
39
- options = @default_diff_options.merge(options)
40
-
41
- from = prepare_input(from_notebook, options)
42
- to = prepare_input(to_notebook, options)
43
-
44
- d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
45
- File.write(options[:write_output_to], d) if options[:write_output_to]
46
- d
11
+ Diff.new(transformer.transform(from), transformer.transform(to), diffy_opts)
47
12
  rescue InvalidNotebookError
48
- raise if options[:raise_if_invalid_notebook]
13
+ raise if raise_if_invalid_nb
49
14
  end
50
15
 
51
- def self.transform(notebook, raise_errors: false, options: @default_transform_options)
52
- options = @default_transform_options.merge(options)
16
+ def self.transform(notebook, raise_errors: false, include_frontmatter: true)
17
+ return unless notebook
53
18
 
54
- Transformer.new(**options).transform(notebook)
19
+ Transformer.new(include_frontmatter: include_frontmatter).transform(notebook).as_text
55
20
  rescue InvalidNotebookError
56
21
  raise if raise_errors
57
22
  end
@@ -1,61 +1,76 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module IpynbDiff
4
-
5
4
  # Transforms Jupyter output data into markdown
6
5
  class OutputTransformer
6
+ require 'symbolized_markdown_helper'
7
+ include SymbolizedMarkdownHelper
7
8
 
8
9
  ORDERED_KEYS = {
9
10
  'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
11
  'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
12
  }.freeze
12
13
 
13
- def transform(output)
14
- case (output_type = output['output_type'])
15
- when 'error'
16
- transform_error(output['traceback'])
17
- when 'execute_result', 'display_data'
18
- transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
- end
14
+ def transform(output, symbol)
15
+ transformed = case (output_type = output['output_type'])
16
+ when 'error'
17
+ transform_error(output['traceback'], symbol / 'traceback')
18
+ when 'execute_result', 'display_data'
19
+ transform_non_error(ORDERED_KEYS[output_type], output['data'], symbol / 'data')
20
+ end
21
+
22
+ decorate_output(transformed, output, symbol) if transformed
20
23
  end
21
24
 
22
- def transform_error(traceback)
23
- traceback.map do |t|
24
- t.split("\n").map do |line|
25
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
25
+ def decorate_output(output_rows, output, symbol)
26
+ [
27
+ _,
28
+ symbol, %(%%%% Output: #{output['output_type']}),
29
+ _,
30
+ *output_rows
31
+ ]
32
+ end
33
+
34
+ def transform_error(traceback, symbol)
35
+ traceback.map.with_index do |t, idx|
36
+ t.split("\n").map do |l|
37
+ [symbol / idx, l.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip]
26
38
  end
27
39
  end
28
40
  end
29
41
 
30
- def transform_non_error(accepted_keys, elements)
31
- accepted_keys.map do |key|
32
- transform_element(key, elements[key]) if elements.key?(key)
33
- end.flatten
42
+ def transform_non_error(accepted_keys, elements, symbol)
43
+ accepted_keys.filter { |key| elements.key?(key) }.map do |key|
44
+ transform_element(key, elements[key], symbol)
45
+ end
34
46
  end
35
47
 
36
- def transform_element(output_type, output_element)
48
+ def transform_element(output_type, output_element, symbol_prefix)
49
+ new_symbol = symbol_prefix / output_type
37
50
  case output_type
38
51
  when 'image/png', 'image/jpeg'
39
- transform_image(output_type, output_element)
52
+ transform_image(output_type, output_element, new_symbol)
40
53
  when 'image/svg+xml'
41
- transform_svg(output_element)
54
+ transform_svg(output_element, new_symbol)
42
55
  when 'text/markdown', 'text/latex', 'text/plain'
43
- transform_text(output_element)
56
+ transform_text(output_element, new_symbol)
44
57
  end
45
58
  end
46
59
 
47
- def transform_image(image_type, image_content)
48
- [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
60
+ def transform_image(image_type, image_content, symbol)
61
+ [symbol, " ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})"]
49
62
  end
50
63
 
51
- def transform_svg(image_content)
52
- single_line = image_content.map(&:strip).join('').gsub(/\s+/, ' ')
64
+ def transform_svg(image_content, symbol)
65
+ lines = image_content.is_a?(Array) ? image_content : [image_content]
66
+
67
+ single_line = lines.map(&:strip).join.gsub(/\s+/, ' ')
53
68
 
54
- [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
69
+ [symbol, " ![](data:image/svg+xml;utf8,#{single_line})"]
55
70
  end
56
71
 
57
- def transform_text(text_content)
58
- text_content.map { |line| " #{line}" }.append("\n")
72
+ def transform_text(text_content, symbol)
73
+ symbolize_array(symbol, text_content) { |l| " #{l.rstrip}" }
59
74
  end
60
75
  end
61
76
  end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ # Helper functions
5
+ module SymbolizedMarkdownHelper
6
+
7
+ def _(content = '')
8
+ [nil, content]
9
+ end
10
+
11
+ def array_if_not_array(thing)
12
+ thing.is_a?(Array) ? thing : [thing]
13
+ end
14
+
15
+ def symbolize_array(symbol, content, &block)
16
+ if content.is_a?(Array)
17
+ content.map.with_index { |l, idx| [symbol / idx, block.call(l)] }
18
+ else
19
+ [symbol, content]
20
+ end
21
+ end
22
+ end
23
+
24
+ # Simple wrapper for a string
25
+ class JsonSymbol < String
26
+ def /(other)
27
+ JsonSymbol.new((other.is_a?(Array) ? [self, *other] : [self, other]).join('.'))
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ # Notebook that was transformed into md, including location of source cells
5
+ class TransformedNotebook
6
+ attr_reader :blocks
7
+
8
+ def as_text
9
+ @blocks.map { |b| b[:content] }.join("\n")
10
+ end
11
+
12
+ private
13
+
14
+ def initialize(lines = [], symbols = [], symbol_map = {})
15
+ @blocks = lines.zip(symbols).map do |line, symbol|
16
+ { content: line, source_symbol: symbol, source_line: symbol && symbol_map[symbol] }
17
+ end
18
+ end
19
+ end
20
+ end
data/lib/transformer.rb CHANGED
@@ -9,21 +9,22 @@ module IpynbDiff
9
9
  require 'json'
10
10
  require 'yaml'
11
11
  require 'output_transformer'
12
+ require 'symbolized_markdown_helper'
13
+ require 'ipynb_symbol_map'
14
+ require 'transformed_notebook'
15
+ include SymbolizedMarkdownHelper
12
16
 
13
- @cell_decorator = :html
14
- @include_metadata = true
17
+ @include_frontmatter = true
15
18
 
16
-
17
- def initialize(include_metadata: true, cell_decorator: :html)
18
- @include_metadata = include_metadata
19
- @cell_decorator = cell_decorator
19
+ def initialize(include_frontmatter: true)
20
+ @include_frontmatter = include_frontmatter
20
21
  @output_transformer = OutputTransformer.new
21
22
  end
22
23
 
23
24
  def validate_notebook(notebook)
24
25
  notebook_json = JSON.parse(notebook)
25
26
 
26
- return notebook_json if notebook_json.key?('cells') && notebook_json.key?('metadata')
27
+ return notebook_json if notebook_json.key?('cells')
27
28
 
28
29
  raise InvalidNotebookError
29
30
  rescue JSON::ParserError
@@ -31,70 +32,74 @@ module IpynbDiff
31
32
  end
32
33
 
33
34
  def transform(notebook)
34
- notebook_json = validate_notebook(notebook)
35
- transformed_blocks = notebook_json['cells'].map do |cell|
36
- decorate_cell(transform_cell(cell, notebook_json), cell)
37
- end
35
+ return TransformedNotebook.new unless notebook
38
36
 
39
- transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
40
- transformed_blocks.join("\n")
41
- end
37
+ notebook_json = validate_notebook(notebook)
38
+ transformed = transform_document(notebook_json)
39
+ symbol_map = IpynbSymbolMap.parse(notebook)
42
40
 
43
- def decorate_cell(rows, cell)
44
- tags = cell['metadata']&.fetch('tags', [])
45
- type = cell['cell_type'] || 'raw'
41
+ symbols, lines = if transformed && !transformed.empty?
42
+ transformed.partition.each_with_index { |_el, i| i.even? }
43
+ else
44
+ [[], []]
45
+ end
46
46
 
47
- case @cell_decorator
48
- when :html
49
- rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
50
- .append("\n</div>\n")
51
- when :percent
52
- rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
53
- else
54
- rows
55
- end.join('')
47
+ TransformedNotebook.new(lines, symbols, symbol_map)
56
48
  end
57
49
 
58
- def transform_cell(cell, notebook)
59
- cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
60
- end
50
+ def transform_document(notebook)
51
+ symbol = JsonSymbol.new('.cells')
61
52
 
62
- def decorate_output(output_rows, output)
63
- if @cell_decorator == :html
64
- output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
65
- else
66
- output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
53
+ transformed_blocks = notebook['cells'].map.with_index do |cell, idx|
54
+ decorate_cell(transform_cell(cell, notebook, symbol / idx), cell, symbol / idx)
67
55
  end
56
+
57
+ transformed_blocks.prepend(transform_metadata(notebook)) if @include_frontmatter
58
+ transformed_blocks.flatten
68
59
  end
69
60
 
70
- def transform_code_cell(cell, notebook)
61
+ def decorate_cell(rows, cell, symbol)
62
+ tags = cell['metadata']&.fetch('tags', [])
63
+ type = cell['cell_type'] || 'raw'
64
+
71
65
  [
72
- %(``` #{notebook['metadata']['kernelspec']['language']}\n),
73
- *cell['source'],
74
- "\n```\n",
75
- *cell['outputs'].map { |output| transform_output(output) }
66
+ symbol, %(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}),
67
+ _,
68
+ *rows,
69
+ _
76
70
  ]
77
71
  end
78
72
 
79
- def transform_output(output)
80
- transformed = @output_transformer.transform(output)
73
+ def transform_cell(cell, notebook, symbol)
74
+ cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook, symbol) : transform_text_cell(cell, symbol)
75
+ end
81
76
 
82
- decorate_output(transformed, output).join('') if transformed
77
+ def transform_code_cell(cell, notebook, symbol)
78
+ [
79
+ symbol / 'source', %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}),
80
+ symbolize_array(symbol / 'source', cell['source'], &:rstrip),
81
+ _('```'),
82
+ cell['outputs'].map.with_index do |output, idx|
83
+ @output_transformer.transform(output, symbol / ['outputs', idx])
84
+ end
85
+ ]
83
86
  end
84
87
 
85
- def transform_text_cell(cell)
86
- cell['source'].append("\n")
88
+ def transform_text_cell(cell, symbol)
89
+ symbolize_array(symbol / 'source', cell['source'], &:rstrip)
87
90
  end
88
91
 
89
92
  def transform_metadata(notebook_json)
90
- {
93
+ as_yaml = {
91
94
  'jupyter' => {
92
95
  'kernelspec' => notebook_json['metadata']['kernelspec'],
93
96
  'language_info' => notebook_json['metadata']['language_info'],
94
97
  'nbformat' => notebook_json['nbformat'],
95
98
  'nbformat_minor' => notebook_json['nbformat_minor']
96
99
  }
97
- }.to_yaml + "---\n"
100
+ }.to_yaml
101
+
102
+ as_yaml.split("\n").map { |l| _(l) }.append(_('---'), _)
98
103
  end
99
104
  end
100
105
  end
data/lib/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "0.4.1"
5
+ end
metadata CHANGED
@@ -1,41 +1,47 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-04 00:00:00.000000000 Z
11
+ date: 2022-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 3.3.0
19
+ version: '3.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 3.3.0
26
+ version: '3.3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: json
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '='
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.5'
34
+ - - ">="
32
35
  - !ruby/object:Gem::Version
33
36
  version: 2.5.1
34
37
  type: :runtime
35
38
  prerelease: false
36
39
  version_requirements: !ruby/object:Gem::Requirement
37
40
  requirements:
38
- - - '='
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '2.5'
44
+ - - ">="
39
45
  - !ruby/object:Gem::Version
40
46
  version: 2.5.1
41
47
  - !ruby/object:Gem::Dependency
@@ -108,6 +114,20 @@ dependencies:
108
114
  - - ">="
109
115
  - !ruby/object:Gem::Version
110
116
  version: '0'
117
+ - !ruby/object:Gem::Dependency
118
+ name: rspec-parametized
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
111
131
  description: Better diff for Jupyter Notebooks by first preprocessing them and removing
112
132
  clutter
113
133
  email: ebonet@gitlab.com
@@ -115,19 +135,28 @@ executables: []
115
135
  extensions: []
116
136
  extra_rdoc_files: []
117
137
  files:
138
+ - ".VERSION.TMPL"
118
139
  - ".gitignore"
119
140
  - ".gitlab-ci.yml"
141
+ - ".rubocop.yml"
142
+ - ".rubocop_todo.yml"
120
143
  - Gemfile
121
144
  - Gemfile.lock
122
145
  - README.md
123
146
  - ipynbdiff.gemspec
147
+ - lib/diff.rb
148
+ - lib/ipynb_symbol_map.rb
124
149
  - lib/ipynbdiff.rb
125
150
  - lib/output_transformer.rb
151
+ - lib/symbolized_markdown_helper.rb
152
+ - lib/transformed_notebook.rb
126
153
  - lib/transformer.rb
154
+ - lib/version.rb
127
155
  homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
128
156
  licenses:
129
157
  - MIT
130
- metadata: {}
158
+ metadata:
159
+ rubygems_mfa_required: 'true'
131
160
  post_install_message:
132
161
  rdoc_options: []
133
162
  require_paths: