ipynbdiff 0.3.8 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fbeaad80969c974720e75336301dc02822fbf183b278d8e5ddd5dd18c65ddde1
4
- data.tar.gz: c186fa7fac873dff429cf3fd24c4a5cb61901670270850fd3d623c363db21182
3
+ metadata.gz: dd4b3e7e96694361ee62a498007e9bda9165b47b2ef9a31b41322cf8cdd8781f
4
+ data.tar.gz: 1680189fc74e0b8e8909d671ce3b77abff51e8ccd0d4304cf088b43a3254a311
5
5
  SHA512:
6
- metadata.gz: 23e6a0c192d671fdcb394334a89d40eef9a58e39d934d1138e0ba37943c928d035d2f471e868b2af3d7f24a02f77d927b1e72d8cd3dd8df1f6ef9616796a8ab0
7
- data.tar.gz: 9632766d4dd4e6e57775d59d7a14d01fb664156bd3c41b1af894481f4bf33f807b54ebad0f664184f606c37a6b8f818326e7238e320ba06135f192753c659f20
6
+ metadata.gz: fd5cf9cad8f9c20db2051a8b66a998a8174d066ca71e8a45c7bb5e1e274478919ed44fee1a38b296b7a5c679df686da96e402c750d51df0a68db7328260181a7
7
+ data.tar.gz: a7ba937ec71caa7a5a5a31cd47ea047f9b2518bd6c7f8a61f120b62deb7d3e6a07dc8ba76e8719b725dab8b6f78cb9b86771273a5a46d9798d744c2bc25d00bf
data/.VERSION.TMPL ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "GEM_VERSION"
5
+ end
data/.gitlab-ci.yml CHANGED
@@ -1,6 +1,43 @@
1
+ # You can override the included template(s) by including variable overrides
2
+ # SAST customization: https://docs.gitlab.com/ee/user/application_security/sast/#customizing-the-sast-settings
3
+ # Secret Detection customization: https://docs.gitlab.com/ee/user/application_security/secret_detection/#customizing-settings
4
+ # Dependency Scanning customization: https://docs.gitlab.com/ee/user/application_security/dependency_scanning/#customizing-the-dependency-scanning-settings
5
+ # Note that environment variables can be set in several places
6
+ # See https://docs.gitlab.com/ee/ci/variables/#cicd-variable-precedence
7
+ image: ruby:2.7
8
+ stages:
9
+ - test
10
+ - build
11
+ - rubygems
1
12
  specs:
2
13
  stage: test
3
- image: ruby:2.7
4
14
  script:
5
- - bundle install
6
- - bundle exec rspec
15
+ - bundle install
16
+ - bundle exec rspec
17
+ build-gem:
18
+ stage: build
19
+ script:
20
+ - bundle install
21
+ - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.0/ > lib/version.rb
22
+ - gem build ipynbdiff.gemspec
23
+ artifacts:
24
+ paths:
25
+ - ipynbdiff-0.0.0.gem
26
+ needs:
27
+ - specs
28
+ deploy-gem:
29
+ stage: rubygems
30
+ script:
31
+ - bundle install
32
+ - cat .VERSION.TMPL | sed s/GEM_VERSION/$CI_COMMIT_TAG/ > lib/version.rb
33
+ - gem build ipynbdiff.gemspec
34
+ - gem push ipynbdiff-$CI_COMMIT_TAG.gem
35
+ only:
36
+ - tags
37
+ except:
38
+ - branches
39
+ needs:
40
+ - build-gem
41
+ when: manual
42
+ include:
43
+ - template: Security/Dependency-Scanning.gitlab-ci.yml
data/.rubocop.yml ADDED
@@ -0,0 +1 @@
1
+ inherit_from: .rubocop_todo.yml
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,31 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2021-12-22 14:13:29 UTC using RuboCop version 1.23.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ # Configuration parameters: Include.
11
+ # Include: **/*.gemspec
12
+ Gemspec/RequiredRubyVersion:
13
+ Exclude:
14
+ - 'ipynbdiff.gemspec'
15
+
16
+ AllCops:
17
+ NewCops: enable
18
+
19
+ Style/StringConcatenation:
20
+ Enabled: false
21
+
22
+ # Offense count: 6
23
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
24
+ # IgnoredMethods: refine
25
+ Metrics/BlockLength:
26
+ Enabled: false
27
+
28
+ # Offense count: 3
29
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
30
+ Metrics/MethodLength:
31
+ Enabled: false
data/Gemfile CHANGED
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- source "https://rubygems.org"
3
+ source 'https://rubygems.org'
4
4
 
5
5
  gem 'diffy', '3.3.0'
6
6
  gem 'json', '2.5.1'
7
7
  gem 'rspec', '3.10.0'
8
+ gem 'rspec-parameterized', '0.5.0'
data/Gemfile.lock CHANGED
@@ -1,9 +1,18 @@
1
1
  GEM
2
2
  remote: https://rubygems.org/
3
3
  specs:
4
+ ast (2.4.2)
5
+ binding_ninja (0.2.3)
6
+ coderay (1.1.3)
4
7
  diff-lcs (1.4.4)
5
8
  diffy (3.3.0)
6
9
  json (2.5.1)
10
+ parser (3.0.2.0)
11
+ ast (~> 2.4.1)
12
+ proc_to_ast (0.1.0)
13
+ coderay
14
+ parser
15
+ unparser
7
16
  rspec (3.10.0)
8
17
  rspec-core (~> 3.10.0)
9
18
  rspec-expectations (~> 3.10.0)
@@ -16,15 +25,26 @@ GEM
16
25
  rspec-mocks (3.10.2)
17
26
  diff-lcs (>= 1.2.0, < 2.0)
18
27
  rspec-support (~> 3.10.0)
28
+ rspec-parameterized (0.5.0)
29
+ binding_ninja (>= 0.2.3)
30
+ parser
31
+ proc_to_ast
32
+ rspec (>= 2.13, < 4)
33
+ unparser
19
34
  rspec-support (3.10.2)
35
+ unparser (0.6.0)
36
+ diff-lcs (~> 1.3)
37
+ parser (>= 3.0.0)
20
38
 
21
39
  PLATFORMS
40
+ ruby
22
41
  x86_64-darwin-20
23
42
 
24
43
  DEPENDENCIES
25
44
  diffy (= 3.3.0)
26
45
  json (= 2.5.1)
27
46
  rspec (= 3.10.0)
47
+ rspec-parameterized (= 0.5.0)
28
48
 
29
49
  BUNDLED WITH
30
- 2.2.29
50
+ 2.2.30
data/README.md CHANGED
@@ -7,10 +7,10 @@ that the entire file is readable on the diff.
7
7
 
8
8
  The result are diffs that are much easier to read:
9
9
 
10
- | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
- | ------ | ------ | ------ |
12
- | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
- | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
10
+ | Diff | | IpynbDiff |
11
+ | ------ | ------ |
12
+ | [Here](example/diff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
+ | ![](example/img/diff.png) | ![](example/img/ipynbdiff_percent.png) |
14
14
 
15
15
 
16
16
  This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
@@ -52,7 +52,6 @@ Options:
52
52
 
53
53
  ```ruby
54
54
  @default_transform_options = {
55
- include_metadata: false, # Whether to include or not the notebook metadata (kernel, language, etc)
56
- cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
55
+ include_frontmatter: false, # Whether to include or not the notebook metadata (kernel, language, etc)
57
56
  }
58
57
  ```
data/ipynbdiff.gemspec CHANGED
@@ -1,29 +1,38 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ lib = File.expand_path('lib/..', __dir__)
4
+ $LOAD_PATH.unshift lib unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'lib/version'
7
+
3
8
  Gem::Specification.new do |s|
4
9
  s.name = 'ipynbdiff'
5
- s.version = ENV['LIB_VERSION']
10
+ s.version = IpynbDiff::VERSION
6
11
  s.summary = 'Human Readable diffs for Jupyter Notebooks'
7
12
  s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
8
13
  s.authors = ['Eduardo Bonet']
9
14
  s.email = 'ebonet@gitlab.com'
10
15
  # Specify which files should be added to the gem when it is released.
11
16
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
12
- s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
17
+ s.files = Dir.chdir(File.expand_path(__dir__)) do
13
18
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec|example)/}) }
14
19
  end
15
- s.homepage =
20
+ s.homepage =
16
21
  'https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff'
17
22
  s.license = 'MIT'
18
23
 
19
24
  s.require_paths = ['lib']
20
25
 
21
- s.add_runtime_dependency 'diffy', '3.3.0'
22
- s.add_runtime_dependency 'json', '2.5.1'
26
+ s.add_runtime_dependency 'diffy', '~> 3.3'
27
+ s.add_runtime_dependency 'json', '~> 2.5', '>= 2.5.1'
23
28
 
24
29
  s.add_development_dependency 'bundler', '~> 2.2'
25
30
  s.add_development_dependency 'guard-rspec'
26
31
  s.add_development_dependency 'pry'
27
32
  s.add_development_dependency 'rake'
28
33
  s.add_development_dependency 'rspec'
34
+ s.add_development_dependency 'rspec-parametized'
35
+ s.metadata = {
36
+ 'rubygems_mfa_required' => 'true'
37
+ }
29
38
  end
data/lib/diff.rb ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Custom differ for Jupyter Notebooks
4
+ module IpynbDiff
5
+ require 'delegate'
6
+
7
+ # The result of a diff object
8
+ class Diff < SimpleDelegator
9
+ require 'diffy'
10
+
11
+ attr_reader :from, :to
12
+
13
+ def initialize(from, to, diffy_opts)
14
+ super(Diffy::Diff.new(from.as_text, to.as_text, **diffy_opts))
15
+
16
+ @from = from
17
+ @to = to
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ class InvalidTokenError < StandardError
5
+ end
6
+
7
+ # Creates a symbol map for a ipynb file (JSON format)
8
+ class IpynbSymbolMap
9
+ class << self
10
+ def parse(notebook)
11
+ IpynbSymbolMap.new(notebook).parse('')
12
+ end
13
+ end
14
+
15
+ attr_reader :current_line, :char_idx, :results
16
+
17
+ WHITESPACE_CHARS = ["\t", "\r", ' ', "\n"].freeze
18
+
19
+ VALUE_STOPPERS = [',', '[', ']', '{', '}', *WHITESPACE_CHARS].freeze
20
+
21
+ def initialize(notebook)
22
+ @chars = notebook.chars
23
+ @current_line = 0
24
+ @char_idx = 0
25
+ @results = {}
26
+ end
27
+
28
+ def parse(prefix = '.')
29
+ skip_whitespaces
30
+
31
+ if (c = current_char) == '"'
32
+ parse_string
33
+ elsif c == '['
34
+ parse_array(prefix)
35
+ elsif c == '{'
36
+ parse_object(prefix)
37
+ else
38
+ parse_value
39
+ end
40
+
41
+ results
42
+ end
43
+
44
+ def parse_array(prefix)
45
+ # [1, 2, {"some": "object"}, [1]]
46
+
47
+ i = 0
48
+
49
+ current_should_be '['
50
+
51
+ loop do
52
+ break if skip_beginning(']')
53
+
54
+ new_prefix = "#{prefix}.#{i}"
55
+
56
+ add_result(new_prefix, current_line)
57
+
58
+ parse(new_prefix)
59
+
60
+ i += 1
61
+ end
62
+ end
63
+
64
+ def parse_object(prefix)
65
+ # {"name":"value", "another_name": [1, 2, 3]}
66
+
67
+ current_should_be '{'
68
+
69
+ loop do
70
+ break if skip_beginning('}')
71
+
72
+ prop_name = parse_string
73
+
74
+ new_prefix = "#{prefix}.#{prop_name}"
75
+
76
+ add_result(new_prefix, current_line)
77
+
78
+ next_and_skip_whitespaces
79
+
80
+ current_should_be ':'
81
+
82
+ next_and_skip_whitespaces
83
+
84
+ parse(new_prefix)
85
+ end
86
+ end
87
+
88
+ def parse_string
89
+ value = ''
90
+ prev_char = nil
91
+
92
+ current_should_be '"'
93
+
94
+ loop do
95
+ increment_char_index
96
+ break if (c = current_char) == '"' && prev_char != '\\'
97
+
98
+ value += (prev_char = c)
99
+ end
100
+
101
+ value
102
+ end
103
+
104
+ def add_result(key, line_number)
105
+ @results[key] = line_number
106
+ end
107
+
108
+ def parse_value
109
+ increment_char_index until VALUE_STOPPERS.include?(current_char)
110
+ end
111
+
112
+ def skip_whitespaces
113
+ while WHITESPACE_CHARS.include?(current_char)
114
+ check_for_new_line
115
+ increment_char_index
116
+ end
117
+ end
118
+
119
+ def increment_char_index
120
+ @char_idx += 1
121
+ end
122
+
123
+ def next_and_skip_whitespaces
124
+ increment_char_index
125
+ skip_whitespaces
126
+ end
127
+
128
+ def current_char
129
+ @chars[@char_idx]
130
+ end
131
+
132
+ def current_should_be(another_char)
133
+ raise InvalidTokenError unless current_char == another_char
134
+ end
135
+
136
+ def check_for_new_line
137
+ @current_line += 1 if current_char == "\n"
138
+ end
139
+
140
+ def skip_beginning(closing_char)
141
+
142
+ check_for_new_line
143
+
144
+ next_and_skip_whitespaces
145
+
146
+ return true if current_char == closing_char
147
+
148
+ next_and_skip_whitespaces if current_char == ','
149
+ end
150
+ end
151
+ end
data/lib/ipynbdiff.rb CHANGED
@@ -3,55 +3,20 @@
3
3
  # Human Readable Jupyter Diffs
4
4
  module IpynbDiff
5
5
  require 'transformer'
6
- require 'diffy'
6
+ require 'diff'
7
7
 
8
- @default_transform_options = {
9
- include_metadata: false,
10
- cell_decorator: :html
11
- }
8
+ def self.diff(from, to, raise_if_invalid_nb: false, include_frontmatter: false, diffy_opts: {})
9
+ transformer = Transformer.new(include_frontmatter: include_frontmatter)
12
10
 
13
- @default_diff_options = {
14
- preprocess_input: true,
15
- write_output_to: nil,
16
- format: :text,
17
- sources_are_files: false,
18
- raise_if_invalid_notebook: false,
19
- transform_options: @default_transform_options,
20
- diff_opts: {
21
- include_diff_info: false
22
- }
23
- }.freeze
24
-
25
- def self.prepare_input(to_prepare, options)
26
- return '' unless to_prepare
27
-
28
- prep = to_prepare
29
- prep = File.read(prep) if options[:sources_are_files]
30
- prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
31
- prep
32
- end
33
-
34
- def self.diff(
35
- from_notebook,
36
- to_notebook,
37
- options = @default_diff_options
38
- )
39
- options = @default_diff_options.merge(options)
40
-
41
- from = prepare_input(from_notebook, options)
42
- to = prepare_input(to_notebook, options)
43
-
44
- d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
45
- File.write(options[:write_output_to], d) if options[:write_output_to]
46
- d
11
+ Diff.new(transformer.transform(from), transformer.transform(to), diffy_opts)
47
12
  rescue InvalidNotebookError
48
- raise if options[:raise_if_invalid_notebook]
13
+ raise if raise_if_invalid_nb
49
14
  end
50
15
 
51
- def self.transform(notebook, raise_errors: false, options: @default_transform_options)
52
- options = @default_transform_options.merge(options)
16
+ def self.transform(notebook, raise_errors: false, include_frontmatter: true)
17
+ return unless notebook
53
18
 
54
- Transformer.new(**options).transform(notebook)
19
+ Transformer.new(include_frontmatter: include_frontmatter).transform(notebook).as_text
55
20
  rescue InvalidNotebookError
56
21
  raise if raise_errors
57
22
  end
@@ -1,65 +1,79 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module IpynbDiff
4
-
5
4
  # Transforms Jupyter output data into markdown
6
5
  class OutputTransformer
6
+ require 'symbolized_markdown_helper'
7
+ include SymbolizedMarkdownHelper
7
8
 
8
9
  ORDERED_KEYS = {
9
10
  'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
- 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
+ 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex],
12
+ 'stream' => %w[text]
11
13
  }.freeze
12
14
 
13
- def transform(output)
14
- case (output_type = output['output_type'])
15
- when 'error'
16
- transform_error(output['traceback'])
17
- when 'execute_result', 'display_data'
18
- transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
- end
15
+ def transform(output, symbol)
16
+ transformed = case (output_type = output['output_type'])
17
+ when 'error'
18
+ transform_error(output['traceback'], symbol / 'traceback')
19
+ when 'execute_result', 'display_data'
20
+ transform_non_error(ORDERED_KEYS[output_type], output['data'], symbol / 'data')
21
+ when 'stream'
22
+ transform_element('text', output['text'], symbol)
23
+ end
24
+
25
+ transformed ? decorate_output(transformed, output, symbol) : []
20
26
  end
21
27
 
22
- def transform_error(traceback)
23
- traceback.map do |t|
24
- t.split("\n").map do |line|
25
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
28
+ def decorate_output(output_rows, output, symbol)
29
+ [
30
+ _,
31
+ _(symbol, %(%%%% Output: #{output['output_type']})),
32
+ _,
33
+ *output_rows
34
+ ]
35
+ end
36
+
37
+ def transform_error(traceback, symbol)
38
+ traceback.map.with_index do |t, idx|
39
+ t.split("\n").map do |l|
40
+ _(symbol / idx, l.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip)
26
41
  end
27
42
  end
28
43
  end
29
44
 
30
- def transform_non_error(accepted_keys, elements)
31
- accepted_keys.map do |key|
32
- transform_element(key, elements[key]) if elements.key?(key)
33
- end.flatten
45
+ def transform_non_error(accepted_keys, elements, symbol)
46
+ accepted_keys.filter { |key| elements.key?(key) }.map do |key|
47
+ transform_element(key, elements[key], symbol)
48
+ end
34
49
  end
35
50
 
36
- def transform_element(output_type, output_element)
51
+ def transform_element(output_type, output_element, symbol_prefix)
52
+ new_symbol = symbol_prefix / output_type
37
53
  case output_type
38
54
  when 'image/png', 'image/jpeg'
39
- transform_image(output_type, output_element)
55
+ transform_image(output_type, output_element, new_symbol)
40
56
  when 'image/svg+xml'
41
- transform_svg(output_element)
42
- when 'text/markdown', 'text/latex', 'text/plain'
43
- transform_text(output_element)
57
+ transform_svg(output_element, new_symbol)
58
+ when 'text/markdown', 'text/latex', 'text/plain', 'text'
59
+ transform_text(output_element, new_symbol)
44
60
  end
45
61
  end
46
62
 
47
- def transform_image(image_type, image_content)
48
- [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
63
+ def transform_image(image_type, image_content, symbol)
64
+ _(symbol, " ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})")
49
65
  end
50
66
 
51
- def transform_svg(image_content)
67
+ def transform_svg(image_content, symbol)
52
68
  lines = image_content.is_a?(Array) ? image_content : [image_content]
53
69
 
54
- single_line = lines.map(&:strip).join('').gsub(/\s+/, ' ')
70
+ single_line = lines.map(&:strip).join.gsub(/\s+/, ' ')
55
71
 
56
- [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
72
+ _(symbol, " ![](data:image/svg+xml;utf8,#{single_line})")
57
73
  end
58
74
 
59
- def transform_text(text_content)
60
- lines = text_content.is_a?(Array) ? text_content : [text_content]
61
-
62
- lines.map { |line| " #{line}" }.append("\n")
75
+ def transform_text(text_content, symbol)
76
+ symbolize_array(symbol, text_content) { |l| " #{l.rstrip}" }
63
77
  end
64
78
  end
65
79
  end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ # Helper functions
5
+ module SymbolizedMarkdownHelper
6
+
7
+ def _(symbol = nil, content = '')
8
+ { symbol: symbol, content: content }
9
+ end
10
+
11
+ def array_if_not_array(thing)
12
+ thing.is_a?(Array) ? thing : [thing]
13
+ end
14
+
15
+ def symbolize_array(symbol, content, &block)
16
+ if content.is_a?(Array)
17
+ content.map.with_index { |l, idx| _(symbol / idx, block.call(l)) }
18
+ else
19
+ _(symbol, content)
20
+ end
21
+ end
22
+ end
23
+
24
+ # Simple wrapper for a string
25
+ class JsonSymbol < String
26
+ def /(other)
27
+ JsonSymbol.new((other.is_a?(Array) ? [self, *other] : [self, other]).join('.'))
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ # Notebook that was transformed into md, including location of source cells
5
+ class TransformedNotebook
6
+ attr_reader :blocks
7
+
8
+ def as_text
9
+ @blocks.map { |b| b[:content] }.join("\n")
10
+ end
11
+
12
+ private
13
+
14
+ def initialize(lines = [], symbol_map = {})
15
+ @blocks = lines.map do |line|
16
+ { content: line[:content], source_symbol: (symbol = line[:symbol]), source_line: symbol && symbol_map[symbol] }
17
+ end
18
+ end
19
+ end
20
+ end
data/lib/transformer.rb CHANGED
@@ -9,14 +9,15 @@ module IpynbDiff
9
9
  require 'json'
10
10
  require 'yaml'
11
11
  require 'output_transformer'
12
+ require 'symbolized_markdown_helper'
13
+ require 'ipynb_symbol_map'
14
+ require 'transformed_notebook'
15
+ include SymbolizedMarkdownHelper
12
16
 
13
- @cell_decorator = :html
14
- @include_metadata = true
17
+ @include_frontmatter = true
15
18
 
16
-
17
- def initialize(include_metadata: true, cell_decorator: :html)
18
- @include_metadata = include_metadata
19
- @cell_decorator = cell_decorator
19
+ def initialize(include_frontmatter: true)
20
+ @include_frontmatter = include_frontmatter
20
21
  @output_transformer = OutputTransformer.new
21
22
  end
22
23
 
@@ -31,71 +32,68 @@ module IpynbDiff
31
32
  end
32
33
 
33
34
  def transform(notebook)
35
+ return TransformedNotebook.new unless notebook
36
+
34
37
  notebook_json = validate_notebook(notebook)
35
- transformed_blocks = notebook_json['cells'].map do |cell|
36
- decorate_cell(transform_cell(cell, notebook_json), cell)
37
- end
38
+ transformed = transform_document(notebook_json)
39
+ symbol_map = IpynbSymbolMap.parse(notebook)
38
40
 
39
- transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
40
- transformed_blocks.join("\n")
41
+ TransformedNotebook.new(transformed, symbol_map)
41
42
  end
42
43
 
43
- def decorate_cell(rows, cell)
44
- tags = cell['metadata']&.fetch('tags', [])
45
- type = cell['cell_type'] || 'raw'
44
+ def transform_document(notebook)
45
+ symbol = JsonSymbol.new('.cells')
46
46
 
47
- case @cell_decorator
48
- when :html
49
- rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
50
- .append("\n</div>\n")
51
- when :percent
52
- rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
53
- else
54
- rows
55
- end.join('')
56
- end
47
+ transformed_blocks = notebook['cells'].map.with_index do |cell, idx|
48
+ decorate_cell(transform_cell(cell, notebook, symbol / idx), cell, symbol / idx)
49
+ end
57
50
 
58
- def transform_cell(cell, notebook)
59
- cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
51
+ transformed_blocks.prepend(transform_metadata(notebook)) if @include_frontmatter
52
+ transformed_blocks.flatten
60
53
  end
61
54
 
62
- def decorate_output(output_rows, output)
63
- if @cell_decorator == :html
64
- output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
65
- else
66
- output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
67
- end
68
- end
55
+ def decorate_cell(rows, cell, symbol)
56
+ tags = cell['metadata']&.fetch('tags', [])
57
+ type = cell['cell_type'] || 'raw'
69
58
 
70
- def transform_code_cell(cell, notebook)
71
59
  [
72
- %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}\n),
73
- *cell['source'],
74
- "\n```\n",
75
- *cell['outputs'].map { |output| transform_output(output) }
60
+ _(symbol, %(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')})),
61
+ _,
62
+ rows,
63
+ _
76
64
  ]
77
65
  end
78
66
 
79
- def transform_output(output)
80
- transformed = @output_transformer.transform(output)
67
+ def transform_cell(cell, notebook, symbol)
68
+ cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook, symbol) : transform_text_cell(cell, symbol)
69
+ end
81
70
 
82
- decorate_output(transformed, output).join('') if transformed
71
+ def transform_code_cell(cell, notebook, symbol)
72
+ [
73
+ _(symbol / 'source', %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''})),
74
+ symbolize_array(symbol / 'source', cell['source'], &:rstrip),
75
+ _(nil, '```'),
76
+ cell['outputs'].map.with_index do |output, idx|
77
+ @output_transformer.transform(output, symbol / ['outputs', idx])
78
+ end
79
+ ]
83
80
  end
84
81
 
85
- def transform_text_cell(cell)
86
- source = cell['source']
87
- (source.is_a?(Array) ? source : [source]).append("\n")
82
+ def transform_text_cell(cell, symbol)
83
+ symbolize_array(symbol / 'source', cell['source'], &:rstrip)
88
84
  end
89
85
 
90
86
  def transform_metadata(notebook_json)
91
- {
87
+ as_yaml = {
92
88
  'jupyter' => {
93
89
  'kernelspec' => notebook_json['metadata']['kernelspec'],
94
90
  'language_info' => notebook_json['metadata']['language_info'],
95
91
  'nbformat' => notebook_json['nbformat'],
96
92
  'nbformat_minor' => notebook_json['nbformat_minor']
97
93
  }
98
- }.to_yaml + "---\n"
94
+ }.to_yaml
95
+
96
+ as_yaml.split("\n").map { |l| _(nil, l) }.append(_(nil, '---'), _)
99
97
  end
100
98
  end
101
99
  end
data/lib/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "0.4.2"
5
+ end
metadata CHANGED
@@ -1,41 +1,47 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-18 00:00:00.000000000 Z
11
+ date: 2022-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 3.3.0
19
+ version: '3.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 3.3.0
26
+ version: '3.3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: json
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '='
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.5'
34
+ - - ">="
32
35
  - !ruby/object:Gem::Version
33
36
  version: 2.5.1
34
37
  type: :runtime
35
38
  prerelease: false
36
39
  version_requirements: !ruby/object:Gem::Requirement
37
40
  requirements:
38
- - - '='
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '2.5'
44
+ - - ">="
39
45
  - !ruby/object:Gem::Version
40
46
  version: 2.5.1
41
47
  - !ruby/object:Gem::Dependency
@@ -108,6 +114,20 @@ dependencies:
108
114
  - - ">="
109
115
  - !ruby/object:Gem::Version
110
116
  version: '0'
117
+ - !ruby/object:Gem::Dependency
118
+ name: rspec-parametized
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
111
131
  description: Better diff for Jupyter Notebooks by first preprocessing them and removing
112
132
  clutter
113
133
  email: ebonet@gitlab.com
@@ -115,19 +135,28 @@ executables: []
115
135
  extensions: []
116
136
  extra_rdoc_files: []
117
137
  files:
138
+ - ".VERSION.TMPL"
118
139
  - ".gitignore"
119
140
  - ".gitlab-ci.yml"
141
+ - ".rubocop.yml"
142
+ - ".rubocop_todo.yml"
120
143
  - Gemfile
121
144
  - Gemfile.lock
122
145
  - README.md
123
146
  - ipynbdiff.gemspec
147
+ - lib/diff.rb
148
+ - lib/ipynb_symbol_map.rb
124
149
  - lib/ipynbdiff.rb
125
150
  - lib/output_transformer.rb
151
+ - lib/symbolized_markdown_helper.rb
152
+ - lib/transformed_notebook.rb
126
153
  - lib/transformer.rb
154
+ - lib/version.rb
127
155
  homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
128
156
  licenses:
129
157
  - MIT
130
- metadata: {}
158
+ metadata:
159
+ rubygems_mfa_required: 'true'
131
160
  post_install_message:
132
161
  rdoc_options: []
133
162
  require_paths: