ipynbdiff 0.3.7 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 24615c877db39efeb4b4d0effe014db5a7734f6be88385aaf73bc00622075ca8
4
- data.tar.gz: 2aca335aa15c5e413eaa23d197f3f9a0f73d6415adbafcb419cf7764eb983e89
3
+ metadata.gz: 97969c0adb99db01eecef0831de1cbdb6eb1d2bfc58233195c76d475c8c4a97f
4
+ data.tar.gz: e04aa8f6b704f85d539eb6b59021f67ad3ca1c032a8fd3051115545828ad2c67
5
5
  SHA512:
6
- metadata.gz: 280688c5f5f722cea1868963dcf49eefdb3c35374fad96522d7f8b8a7c77b902d11db1b2b5dd1cde3370065159cee4f1c1f252b1a15f585c589abdc9ede02e1f
7
- data.tar.gz: 50de3ac58248b4af0e67d2e68a476a3c8395753a47638c7ca2015d7c909d55031f605f5334be54d909fd40e5728912623909b7fc3f73f8544b0828dd1e9e6a1a
6
+ metadata.gz: 9699f6249f6c47df87d0854069da21a484a91ef5974cd58570660e48bee7cadda633c7497396875e21d83221b00eaa8b8eb7d1e502340b0dec78718e7735d8b0
7
+ data.tar.gz: 63a1153092296010189a76dc3c9eadb6cf421603b6199189a9d8681048820fc607123aa41a65c08169a8497e7a28795a0325f790fd792105374268d0996e5e0a
data/.VERSION.TMPL ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "GEM_VERSION"
5
+ end
data/.gitlab-ci.yml CHANGED
@@ -1,6 +1,42 @@
1
+ image: ruby:2.7
2
+
3
+ stages:
4
+ - test
5
+ - build
6
+ - rubygems
7
+
1
8
  specs:
2
9
  stage: test
3
- image: ruby:2.7
4
10
  script:
5
11
  - bundle install
6
12
  - bundle exec rspec
13
+
14
+ build-gem:
15
+ stage: build
16
+ script:
17
+ - bundle install
18
+ - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.0/ > lib/version.rb
19
+ - gem build ipynbdiff.gemspec
20
+ artifacts:
21
+ paths:
22
+ - ipynbdiff-0.0.0.gem
23
+ needs:
24
+ - specs
25
+
26
+ deploy-gem:
27
+ stage: rubygems
28
+ script:
29
+ - bundle install
30
+ - cat .VERSION.TMPL | sed s/GEM_VERSION/$CI_COMMIT_TAG/ > lib/version.rb
31
+ - gem build ipynbdiff.gemspec
32
+ - gem push ipynbdiff-$CI_COMMIT_TAG.gem
33
+ only:
34
+ - tags
35
+ except:
36
+ - branches
37
+ needs:
38
+ - build-gem
39
+ when: manual
40
+
41
+
42
+
data/.rubocop.yml ADDED
@@ -0,0 +1 @@
1
+ inherit_from: .rubocop_todo.yml
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,31 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2021-12-22 14:13:29 UTC using RuboCop version 1.23.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ # Configuration parameters: Include.
11
+ # Include: **/*.gemspec
12
+ Gemspec/RequiredRubyVersion:
13
+ Exclude:
14
+ - 'ipynbdiff.gemspec'
15
+
16
+ AllCops:
17
+ NewCops: enable
18
+
19
+ Style/StringConcatenation:
20
+ Enabled: false
21
+
22
+ # Offense count: 6
23
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
24
+ # IgnoredMethods: refine
25
+ Metrics/BlockLength:
26
+ Enabled: false
27
+
28
+ # Offense count: 3
29
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
30
+ Metrics/MethodLength:
31
+ Enabled: false
data/Gemfile CHANGED
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- source "https://rubygems.org"
3
+ source 'https://rubygems.org'
4
4
 
5
5
  gem 'diffy', '3.3.0'
6
6
  gem 'json', '2.5.1'
7
7
  gem 'rspec', '3.10.0'
8
+ gem 'rspec-parameterized', '0.5.0'
data/Gemfile.lock CHANGED
@@ -1,9 +1,18 @@
1
1
  GEM
2
2
  remote: https://rubygems.org/
3
3
  specs:
4
+ ast (2.4.2)
5
+ binding_ninja (0.2.3)
6
+ coderay (1.1.3)
4
7
  diff-lcs (1.4.4)
5
8
  diffy (3.3.0)
6
9
  json (2.5.1)
10
+ parser (3.0.2.0)
11
+ ast (~> 2.4.1)
12
+ proc_to_ast (0.1.0)
13
+ coderay
14
+ parser
15
+ unparser
7
16
  rspec (3.10.0)
8
17
  rspec-core (~> 3.10.0)
9
18
  rspec-expectations (~> 3.10.0)
@@ -16,15 +25,26 @@ GEM
16
25
  rspec-mocks (3.10.2)
17
26
  diff-lcs (>= 1.2.0, < 2.0)
18
27
  rspec-support (~> 3.10.0)
28
+ rspec-parameterized (0.5.0)
29
+ binding_ninja (>= 0.2.3)
30
+ parser
31
+ proc_to_ast
32
+ rspec (>= 2.13, < 4)
33
+ unparser
19
34
  rspec-support (3.10.2)
35
+ unparser (0.6.0)
36
+ diff-lcs (~> 1.3)
37
+ parser (>= 3.0.0)
20
38
 
21
39
  PLATFORMS
40
+ ruby
22
41
  x86_64-darwin-20
23
42
 
24
43
  DEPENDENCIES
25
44
  diffy (= 3.3.0)
26
45
  json (= 2.5.1)
27
46
  rspec (= 3.10.0)
47
+ rspec-parameterized (= 0.5.0)
28
48
 
29
49
  BUNDLED WITH
30
- 2.2.29
50
+ 2.2.30
data/README.md CHANGED
@@ -7,10 +7,10 @@ that the entire file is readable on the diff.
7
7
 
8
8
  The result are diffs that are much easier to read:
9
9
 
10
- | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
- | ------ | ------ | ------ |
12
- | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
- | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
10
+ | Diff | | IpynbDiff |
11
+ | ------ | ------ |
12
+ | [Here](example/diff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
+ | ![](example/img/diff.png) | ![](example/img/ipynbdiff_percent.png) |
14
14
 
15
15
 
16
16
  This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
@@ -52,7 +52,6 @@ Options:
52
52
 
53
53
  ```ruby
54
54
  @default_transform_options = {
55
- include_metadata: false, # Whether to include or not the notebook metadata (kernel, language, etc)
56
- cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
55
+ include_frontmatter: false, # Whether to include or not the notebook metadata (kernel, language, etc)
57
56
  }
58
57
  ```
data/ipynbdiff.gemspec CHANGED
@@ -1,29 +1,35 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'lib/version'
4
+
3
5
  Gem::Specification.new do |s|
4
6
  s.name = 'ipynbdiff'
5
- s.version = ENV['LIB_VERSION']
7
+ s.version = IpynbDiff::VERSION
6
8
  s.summary = 'Human Readable diffs for Jupyter Notebooks'
7
9
  s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
8
10
  s.authors = ['Eduardo Bonet']
9
11
  s.email = 'ebonet@gitlab.com'
10
12
  # Specify which files should be added to the gem when it is released.
11
13
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
12
- s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
14
+ s.files = Dir.chdir(File.expand_path(__dir__)) do
13
15
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec|example)/}) }
14
16
  end
15
- s.homepage =
17
+ s.homepage =
16
18
  'https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff'
17
19
  s.license = 'MIT'
18
20
 
19
21
  s.require_paths = ['lib']
20
22
 
21
- s.add_runtime_dependency 'diffy', '3.3.0'
22
- s.add_runtime_dependency 'json', '2.5.1'
23
+ s.add_runtime_dependency 'diffy', '~> 3.3'
24
+ s.add_runtime_dependency 'json', '~> 2.5', '>= 2.5.1'
23
25
 
24
26
  s.add_development_dependency 'bundler', '~> 2.2'
25
27
  s.add_development_dependency 'guard-rspec'
26
28
  s.add_development_dependency 'pry'
27
29
  s.add_development_dependency 'rake'
28
30
  s.add_development_dependency 'rspec'
31
+ s.add_development_dependency 'rspec-parametized'
32
+ s.metadata = {
33
+ 'rubygems_mfa_required' => 'true'
34
+ }
29
35
  end
data/lib/diff.rb ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Custom differ for Jupyter Notebooks
4
+ module IpynbDiff
5
+ require 'delegate'
6
+
7
+ # The result of a diff object
8
+ class Diff < SimpleDelegator
9
+ require 'diffy'
10
+
11
+ attr_reader :from, :to
12
+
13
+ def initialize(from, to, diffy_opts)
14
+ super(Diffy::Diff.new(from.as_text, to.as_text, **diffy_opts))
15
+
16
+ @from = from
17
+ @to = to
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ class InvalidTokenError < StandardError
5
+ end
6
+
7
+ # Creates a symbol map for a ipynb file (JSON format)
8
+ class IpynbSymbolMap
9
+ class << self
10
+ def parse(notebook)
11
+ IpynbSymbolMap.new(notebook).parse('')
12
+ end
13
+ end
14
+
15
+ attr_reader :current_line, :char_idx, :results
16
+
17
+ WHITESPACE_CHARS = ["\t", "\r", ' ', "\n"].freeze
18
+
19
+ VALUE_STOPPERS = [',', '[', ']', '{', '}', *WHITESPACE_CHARS].freeze
20
+
21
+ def initialize(notebook)
22
+ @chars = notebook.chars
23
+ @current_line = 0
24
+ @char_idx = 0
25
+ @results = {}
26
+ end
27
+
28
+ def parse(prefix = '.')
29
+ skip_whitespaces
30
+
31
+ if (c = current_char) == '"'
32
+ parse_string
33
+ elsif c == '['
34
+ parse_array(prefix)
35
+ elsif c == '{'
36
+ parse_object(prefix)
37
+ else
38
+ parse_value
39
+ end
40
+
41
+ results
42
+ end
43
+
44
+ def parse_array(prefix)
45
+ # [1, 2, {"some": "object"}, [1]]
46
+
47
+ i = 0
48
+
49
+ current_should_be '['
50
+
51
+ loop do
52
+ break if skip_beginning(']')
53
+
54
+ new_prefix = "#{prefix}.#{i}"
55
+
56
+ add_result(new_prefix, current_line)
57
+
58
+ parse(new_prefix)
59
+
60
+ i += 1
61
+ end
62
+ end
63
+
64
+ def parse_object(prefix)
65
+ # {"name":"value", "another_name": [1, 2, 3]}
66
+
67
+ current_should_be '{'
68
+
69
+ loop do
70
+ break if skip_beginning('}')
71
+
72
+ prop_name = parse_string
73
+
74
+ new_prefix = "#{prefix}.#{prop_name}"
75
+
76
+ add_result(new_prefix, current_line)
77
+
78
+ next_and_skip_whitespaces
79
+
80
+ current_should_be ':'
81
+
82
+ next_and_skip_whitespaces
83
+
84
+ parse(new_prefix)
85
+ end
86
+ end
87
+
88
+ def parse_string
89
+ value = ''
90
+ prev_char = nil
91
+
92
+ current_should_be '"'
93
+
94
+ loop do
95
+ increment_char_index
96
+ break if (c = current_char) == '"' && prev_char != '\\'
97
+
98
+ value += (prev_char = c)
99
+ end
100
+
101
+ value
102
+ end
103
+
104
+ def add_result(key, line_number)
105
+ @results[key] = line_number
106
+ end
107
+
108
+ def parse_value
109
+ increment_char_index until VALUE_STOPPERS.include?(current_char)
110
+ end
111
+
112
+ def skip_whitespaces
113
+ while WHITESPACE_CHARS.include?(current_char)
114
+ check_for_new_line
115
+ increment_char_index
116
+ end
117
+ end
118
+
119
+ def increment_char_index
120
+ @char_idx += 1
121
+ end
122
+
123
+ def next_and_skip_whitespaces
124
+ increment_char_index
125
+ skip_whitespaces
126
+ end
127
+
128
+ def current_char
129
+ @chars[@char_idx]
130
+ end
131
+
132
+ def current_should_be(another_char)
133
+ raise InvalidTokenError unless current_char == another_char
134
+ end
135
+
136
+ def check_for_new_line
137
+ @current_line += 1 if current_char == "\n"
138
+ end
139
+
140
+ def skip_beginning(closing_char)
141
+
142
+ check_for_new_line
143
+
144
+ next_and_skip_whitespaces
145
+
146
+ return true if current_char == closing_char
147
+
148
+ next_and_skip_whitespaces if current_char == ','
149
+ end
150
+ end
151
+ end
data/lib/ipynbdiff.rb CHANGED
@@ -3,55 +3,20 @@
3
3
  # Human Readable Jupyter Diffs
4
4
  module IpynbDiff
5
5
  require 'transformer'
6
- require 'diffy'
6
+ require 'diff'
7
7
 
8
- @default_transform_options = {
9
- include_metadata: false,
10
- cell_decorator: :html
11
- }
8
+ def self.diff(from, to, raise_if_invalid_nb: false, include_frontmatter: false, diffy_opts: {})
9
+ transformer = Transformer.new(include_frontmatter: include_frontmatter)
12
10
 
13
- @default_diff_options = {
14
- preprocess_input: true,
15
- write_output_to: nil,
16
- format: :text,
17
- sources_are_files: false,
18
- raise_if_invalid_notebook: false,
19
- transform_options: @default_transform_options,
20
- diff_opts: {
21
- include_diff_info: false
22
- }
23
- }.freeze
24
-
25
- def self.prepare_input(to_prepare, options)
26
- return '' unless to_prepare
27
-
28
- prep = to_prepare
29
- prep = File.read(prep) if options[:sources_are_files]
30
- prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
31
- prep
32
- end
33
-
34
- def self.diff(
35
- from_notebook,
36
- to_notebook,
37
- options = @default_diff_options
38
- )
39
- options = @default_diff_options.merge(options)
40
-
41
- from = prepare_input(from_notebook, options)
42
- to = prepare_input(to_notebook, options)
43
-
44
- d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
45
- File.write(options[:write_output_to], d) if options[:write_output_to]
46
- d
11
+ Diff.new(transformer.transform(from), transformer.transform(to), diffy_opts)
47
12
  rescue InvalidNotebookError
48
- raise if options[:raise_if_invalid_notebook]
13
+ raise if raise_if_invalid_nb
49
14
  end
50
15
 
51
- def self.transform(notebook, raise_errors: false, options: @default_transform_options)
52
- options = @default_transform_options.merge(options)
16
+ def self.transform(notebook, raise_errors: false, include_frontmatter: true)
17
+ return unless notebook
53
18
 
54
- Transformer.new(**options).transform(notebook)
19
+ Transformer.new(include_frontmatter: include_frontmatter).transform(notebook).as_text
55
20
  rescue InvalidNotebookError
56
21
  raise if raise_errors
57
22
  end
@@ -1,61 +1,76 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module IpynbDiff
4
-
5
4
  # Transforms Jupyter output data into markdown
6
5
  class OutputTransformer
6
+ require 'symbolized_markdown_helper'
7
+ include SymbolizedMarkdownHelper
7
8
 
8
9
  ORDERED_KEYS = {
9
10
  'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
11
  'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
12
  }.freeze
12
13
 
13
- def transform(output)
14
- case (output_type = output['output_type'])
15
- when 'error'
16
- transform_error(output['traceback'])
17
- when 'execute_result', 'display_data'
18
- transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
- end
14
+ def transform(output, symbol)
15
+ transformed = case (output_type = output['output_type'])
16
+ when 'error'
17
+ transform_error(output['traceback'], symbol / 'traceback')
18
+ when 'execute_result', 'display_data'
19
+ transform_non_error(ORDERED_KEYS[output_type], output['data'], symbol / 'data')
20
+ end
21
+
22
+ decorate_output(transformed, output, symbol) if transformed
20
23
  end
21
24
 
22
- def transform_error(traceback)
23
- traceback.map do |t|
24
- t.split("\n").map do |line|
25
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
25
+ def decorate_output(output_rows, output, symbol)
26
+ [
27
+ _,
28
+ symbol, %(%%%% Output: #{output['output_type']}),
29
+ _,
30
+ *output_rows
31
+ ]
32
+ end
33
+
34
+ def transform_error(traceback, symbol)
35
+ traceback.map.with_index do |t, idx|
36
+ t.split("\n").map do |l|
37
+ [symbol / idx, l.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip]
26
38
  end
27
39
  end
28
40
  end
29
41
 
30
- def transform_non_error(accepted_keys, elements)
31
- accepted_keys.map do |key|
32
- transform_element(key, elements[key]) if elements.key?(key)
33
- end.flatten
42
+ def transform_non_error(accepted_keys, elements, symbol)
43
+ accepted_keys.filter { |key| elements.key?(key) }.map do |key|
44
+ transform_element(key, elements[key], symbol)
45
+ end
34
46
  end
35
47
 
36
- def transform_element(output_type, output_element)
48
+ def transform_element(output_type, output_element, symbol_prefix)
49
+ new_symbol = symbol_prefix / output_type
37
50
  case output_type
38
51
  when 'image/png', 'image/jpeg'
39
- transform_image(output_type, output_element)
52
+ transform_image(output_type, output_element, new_symbol)
40
53
  when 'image/svg+xml'
41
- transform_svg(output_element)
54
+ transform_svg(output_element, new_symbol)
42
55
  when 'text/markdown', 'text/latex', 'text/plain'
43
- transform_text(output_element)
56
+ transform_text(output_element, new_symbol)
44
57
  end
45
58
  end
46
59
 
47
- def transform_image(image_type, image_content)
48
- [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
60
+ def transform_image(image_type, image_content, symbol)
61
+ [symbol, " ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})"]
49
62
  end
50
63
 
51
- def transform_svg(image_content)
52
- single_line = image_content.map(&:strip).join('').gsub(/\s+/, ' ')
64
+ def transform_svg(image_content, symbol)
65
+ lines = image_content.is_a?(Array) ? image_content : [image_content]
66
+
67
+ single_line = lines.map(&:strip).join.gsub(/\s+/, ' ')
53
68
 
54
- [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
69
+ [symbol, " ![](data:image/svg+xml;utf8,#{single_line})"]
55
70
  end
56
71
 
57
- def transform_text(text_content)
58
- text_content.map { |line| " #{line}" }.append("\n")
72
+ def transform_text(text_content, symbol)
73
+ symbolize_array(symbol, text_content) { |l| " #{l.rstrip}" }
59
74
  end
60
75
  end
61
76
  end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ # Helper functions
5
+ module SymbolizedMarkdownHelper
6
+
7
+ def _(content = '')
8
+ [nil, content]
9
+ end
10
+
11
+ def array_if_not_array(thing)
12
+ thing.is_a?(Array) ? thing : [thing]
13
+ end
14
+
15
+ def symbolize_array(symbol, content, &block)
16
+ if content.is_a?(Array)
17
+ content.map.with_index { |l, idx| [symbol / idx, block.call(l)] }
18
+ else
19
+ [symbol, content]
20
+ end
21
+ end
22
+ end
23
+
24
+ # Simple wrapper for a string
25
+ class JsonSymbol < String
26
+ def /(other)
27
+ JsonSymbol.new((other.is_a?(Array) ? [self, *other] : [self, other]).join('.'))
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ # Notebook that was transformed into md, including location of source cells
5
+ class TransformedNotebook
6
+ attr_reader :blocks
7
+
8
+ def as_text
9
+ @blocks.map { |b| b[:content] }.join("\n")
10
+ end
11
+
12
+ private
13
+
14
+ def initialize(lines = [], symbols = [], symbol_map = {})
15
+ @blocks = lines.zip(symbols).map do |line, symbol|
16
+ { content: line, source_symbol: symbol, source_line: symbol && symbol_map[symbol] }
17
+ end
18
+ end
19
+ end
20
+ end
data/lib/transformer.rb CHANGED
@@ -9,21 +9,22 @@ module IpynbDiff
9
9
  require 'json'
10
10
  require 'yaml'
11
11
  require 'output_transformer'
12
+ require 'symbolized_markdown_helper'
13
+ require 'ipynb_symbol_map'
14
+ require 'transformed_notebook'
15
+ include SymbolizedMarkdownHelper
12
16
 
13
- @cell_decorator = :html
14
- @include_metadata = true
17
+ @include_frontmatter = true
15
18
 
16
-
17
- def initialize(include_metadata: true, cell_decorator: :html)
18
- @include_metadata = include_metadata
19
- @cell_decorator = cell_decorator
19
+ def initialize(include_frontmatter: true)
20
+ @include_frontmatter = include_frontmatter
20
21
  @output_transformer = OutputTransformer.new
21
22
  end
22
23
 
23
24
  def validate_notebook(notebook)
24
25
  notebook_json = JSON.parse(notebook)
25
26
 
26
- return notebook_json if notebook_json.key?('cells') && notebook_json.key?('metadata')
27
+ return notebook_json if notebook_json.key?('cells')
27
28
 
28
29
  raise InvalidNotebookError
29
30
  rescue JSON::ParserError
@@ -31,70 +32,74 @@ module IpynbDiff
31
32
  end
32
33
 
33
34
  def transform(notebook)
34
- notebook_json = validate_notebook(notebook)
35
- transformed_blocks = notebook_json['cells'].map do |cell|
36
- decorate_cell(transform_cell(cell, notebook_json), cell)
37
- end
35
+ return TransformedNotebook.new unless notebook
38
36
 
39
- transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
40
- transformed_blocks.join("\n")
41
- end
37
+ notebook_json = validate_notebook(notebook)
38
+ transformed = transform_document(notebook_json)
39
+ symbol_map = IpynbSymbolMap.parse(notebook)
42
40
 
43
- def decorate_cell(rows, cell)
44
- tags = cell['metadata']&.fetch('tags', [])
45
- type = cell['cell_type'] || 'raw'
41
+ symbols, lines = if transformed && !transformed.empty?
42
+ transformed.partition.each_with_index { |_el, i| i.even? }
43
+ else
44
+ [[], []]
45
+ end
46
46
 
47
- case @cell_decorator
48
- when :html
49
- rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
50
- .append("\n</div>\n")
51
- when :percent
52
- rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
53
- else
54
- rows
55
- end.join('')
47
+ TransformedNotebook.new(lines, symbols, symbol_map)
56
48
  end
57
49
 
58
- def transform_cell(cell, notebook)
59
- cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
60
- end
50
+ def transform_document(notebook)
51
+ symbol = JsonSymbol.new('.cells')
61
52
 
62
- def decorate_output(output_rows, output)
63
- if @cell_decorator == :html
64
- output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
65
- else
66
- output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
53
+ transformed_blocks = notebook['cells'].map.with_index do |cell, idx|
54
+ decorate_cell(transform_cell(cell, notebook, symbol / idx), cell, symbol / idx)
67
55
  end
56
+
57
+ transformed_blocks.prepend(transform_metadata(notebook)) if @include_frontmatter
58
+ transformed_blocks.flatten
68
59
  end
69
60
 
70
- def transform_code_cell(cell, notebook)
61
+ def decorate_cell(rows, cell, symbol)
62
+ tags = cell['metadata']&.fetch('tags', [])
63
+ type = cell['cell_type'] || 'raw'
64
+
71
65
  [
72
- %(``` #{notebook['metadata']['kernelspec']['language']}\n),
73
- *cell['source'],
74
- "\n```\n",
75
- *cell['outputs'].map { |output| transform_output(output) }
66
+ symbol, %(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}),
67
+ _,
68
+ *rows,
69
+ _
76
70
  ]
77
71
  end
78
72
 
79
- def transform_output(output)
80
- transformed = @output_transformer.transform(output)
73
+ def transform_cell(cell, notebook, symbol)
74
+ cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook, symbol) : transform_text_cell(cell, symbol)
75
+ end
81
76
 
82
- decorate_output(transformed, output).join('') if transformed
77
+ def transform_code_cell(cell, notebook, symbol)
78
+ [
79
+ symbol / 'source', %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}),
80
+ symbolize_array(symbol / 'source', cell['source'], &:rstrip),
81
+ _('```'),
82
+ cell['outputs'].map.with_index do |output, idx|
83
+ @output_transformer.transform(output, symbol / ['outputs', idx])
84
+ end
85
+ ]
83
86
  end
84
87
 
85
- def transform_text_cell(cell)
86
- cell['source'].append("\n")
88
+ def transform_text_cell(cell, symbol)
89
+ symbolize_array(symbol / 'source', cell['source'], &:rstrip)
87
90
  end
88
91
 
89
92
  def transform_metadata(notebook_json)
90
- {
93
+ as_yaml = {
91
94
  'jupyter' => {
92
95
  'kernelspec' => notebook_json['metadata']['kernelspec'],
93
96
  'language_info' => notebook_json['metadata']['language_info'],
94
97
  'nbformat' => notebook_json['nbformat'],
95
98
  'nbformat_minor' => notebook_json['nbformat_minor']
96
99
  }
97
- }.to_yaml + "---\n"
100
+ }.to_yaml
101
+
102
+ as_yaml.split("\n").map { |l| _(l) }.append(_('---'), _)
98
103
  end
99
104
  end
100
105
  end
data/lib/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "0.4.1"
5
+ end
metadata CHANGED
@@ -1,41 +1,47 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-04 00:00:00.000000000 Z
11
+ date: 2022-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 3.3.0
19
+ version: '3.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 3.3.0
26
+ version: '3.3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: json
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '='
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.5'
34
+ - - ">="
32
35
  - !ruby/object:Gem::Version
33
36
  version: 2.5.1
34
37
  type: :runtime
35
38
  prerelease: false
36
39
  version_requirements: !ruby/object:Gem::Requirement
37
40
  requirements:
38
- - - '='
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '2.5'
44
+ - - ">="
39
45
  - !ruby/object:Gem::Version
40
46
  version: 2.5.1
41
47
  - !ruby/object:Gem::Dependency
@@ -108,6 +114,20 @@ dependencies:
108
114
  - - ">="
109
115
  - !ruby/object:Gem::Version
110
116
  version: '0'
117
+ - !ruby/object:Gem::Dependency
118
+ name: rspec-parametized
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
111
131
  description: Better diff for Jupyter Notebooks by first preprocessing them and removing
112
132
  clutter
113
133
  email: ebonet@gitlab.com
@@ -115,19 +135,28 @@ executables: []
115
135
  extensions: []
116
136
  extra_rdoc_files: []
117
137
  files:
138
+ - ".VERSION.TMPL"
118
139
  - ".gitignore"
119
140
  - ".gitlab-ci.yml"
141
+ - ".rubocop.yml"
142
+ - ".rubocop_todo.yml"
120
143
  - Gemfile
121
144
  - Gemfile.lock
122
145
  - README.md
123
146
  - ipynbdiff.gemspec
147
+ - lib/diff.rb
148
+ - lib/ipynb_symbol_map.rb
124
149
  - lib/ipynbdiff.rb
125
150
  - lib/output_transformer.rb
151
+ - lib/symbolized_markdown_helper.rb
152
+ - lib/transformed_notebook.rb
126
153
  - lib/transformer.rb
154
+ - lib/version.rb
127
155
  homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
128
156
  licenses:
129
157
  - MIT
130
- metadata: {}
158
+ metadata:
159
+ rubygems_mfa_required: 'true'
131
160
  post_install_message:
132
161
  rdoc_options: []
133
162
  require_paths: