ipynbdiff 0.3.8 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fbeaad80969c974720e75336301dc02822fbf183b278d8e5ddd5dd18c65ddde1
4
- data.tar.gz: c186fa7fac873dff429cf3fd24c4a5cb61901670270850fd3d623c363db21182
3
+ metadata.gz: dd4b3e7e96694361ee62a498007e9bda9165b47b2ef9a31b41322cf8cdd8781f
4
+ data.tar.gz: 1680189fc74e0b8e8909d671ce3b77abff51e8ccd0d4304cf088b43a3254a311
5
5
  SHA512:
6
- metadata.gz: 23e6a0c192d671fdcb394334a89d40eef9a58e39d934d1138e0ba37943c928d035d2f471e868b2af3d7f24a02f77d927b1e72d8cd3dd8df1f6ef9616796a8ab0
7
- data.tar.gz: 9632766d4dd4e6e57775d59d7a14d01fb664156bd3c41b1af894481f4bf33f807b54ebad0f664184f606c37a6b8f818326e7238e320ba06135f192753c659f20
6
+ metadata.gz: fd5cf9cad8f9c20db2051a8b66a998a8174d066ca71e8a45c7bb5e1e274478919ed44fee1a38b296b7a5c679df686da96e402c750d51df0a68db7328260181a7
7
+ data.tar.gz: a7ba937ec71caa7a5a5a31cd47ea047f9b2518bd6c7f8a61f120b62deb7d3e6a07dc8ba76e8719b725dab8b6f78cb9b86771273a5a46d9798d744c2bc25d00bf
data/.VERSION.TMPL ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "GEM_VERSION"
5
+ end
data/.gitlab-ci.yml CHANGED
@@ -1,6 +1,43 @@
1
+ # You can override the included template(s) by including variable overrides
2
+ # SAST customization: https://docs.gitlab.com/ee/user/application_security/sast/#customizing-the-sast-settings
3
+ # Secret Detection customization: https://docs.gitlab.com/ee/user/application_security/secret_detection/#customizing-settings
4
+ # Dependency Scanning customization: https://docs.gitlab.com/ee/user/application_security/dependency_scanning/#customizing-the-dependency-scanning-settings
5
+ # Note that environment variables can be set in several places
6
+ # See https://docs.gitlab.com/ee/ci/variables/#cicd-variable-precedence
7
+ image: ruby:2.7
8
+ stages:
9
+ - test
10
+ - build
11
+ - rubygems
1
12
  specs:
2
13
  stage: test
3
- image: ruby:2.7
4
14
  script:
5
- - bundle install
6
- - bundle exec rspec
15
+ - bundle install
16
+ - bundle exec rspec
17
+ build-gem:
18
+ stage: build
19
+ script:
20
+ - bundle install
21
+ - cat .VERSION.TMPL | sed s/GEM_VERSION/0.0.0/ > lib/version.rb
22
+ - gem build ipynbdiff.gemspec
23
+ artifacts:
24
+ paths:
25
+ - ipynbdiff-0.0.0.gem
26
+ needs:
27
+ - specs
28
+ deploy-gem:
29
+ stage: rubygems
30
+ script:
31
+ - bundle install
32
+ - cat .VERSION.TMPL | sed s/GEM_VERSION/$CI_COMMIT_TAG/ > lib/version.rb
33
+ - gem build ipynbdiff.gemspec
34
+ - gem push ipynbdiff-$CI_COMMIT_TAG.gem
35
+ only:
36
+ - tags
37
+ except:
38
+ - branches
39
+ needs:
40
+ - build-gem
41
+ when: manual
42
+ include:
43
+ - template: Security/Dependency-Scanning.gitlab-ci.yml
data/.rubocop.yml ADDED
@@ -0,0 +1 @@
1
+ inherit_from: .rubocop_todo.yml
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,31 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2021-12-22 14:13:29 UTC using RuboCop version 1.23.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ # Configuration parameters: Include.
11
+ # Include: **/*.gemspec
12
+ Gemspec/RequiredRubyVersion:
13
+ Exclude:
14
+ - 'ipynbdiff.gemspec'
15
+
16
+ AllCops:
17
+ NewCops: enable
18
+
19
+ Style/StringConcatenation:
20
+ Enabled: false
21
+
22
+ # Offense count: 6
23
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
24
+ # IgnoredMethods: refine
25
+ Metrics/BlockLength:
26
+ Enabled: false
27
+
28
+ # Offense count: 3
29
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
30
+ Metrics/MethodLength:
31
+ Enabled: false
data/Gemfile CHANGED
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- source "https://rubygems.org"
3
+ source 'https://rubygems.org'
4
4
 
5
5
  gem 'diffy', '3.3.0'
6
6
  gem 'json', '2.5.1'
7
7
  gem 'rspec', '3.10.0'
8
+ gem 'rspec-parameterized', '0.5.0'
data/Gemfile.lock CHANGED
@@ -1,9 +1,18 @@
1
1
  GEM
2
2
  remote: https://rubygems.org/
3
3
  specs:
4
+ ast (2.4.2)
5
+ binding_ninja (0.2.3)
6
+ coderay (1.1.3)
4
7
  diff-lcs (1.4.4)
5
8
  diffy (3.3.0)
6
9
  json (2.5.1)
10
+ parser (3.0.2.0)
11
+ ast (~> 2.4.1)
12
+ proc_to_ast (0.1.0)
13
+ coderay
14
+ parser
15
+ unparser
7
16
  rspec (3.10.0)
8
17
  rspec-core (~> 3.10.0)
9
18
  rspec-expectations (~> 3.10.0)
@@ -16,15 +25,26 @@ GEM
16
25
  rspec-mocks (3.10.2)
17
26
  diff-lcs (>= 1.2.0, < 2.0)
18
27
  rspec-support (~> 3.10.0)
28
+ rspec-parameterized (0.5.0)
29
+ binding_ninja (>= 0.2.3)
30
+ parser
31
+ proc_to_ast
32
+ rspec (>= 2.13, < 4)
33
+ unparser
19
34
  rspec-support (3.10.2)
35
+ unparser (0.6.0)
36
+ diff-lcs (~> 1.3)
37
+ parser (>= 3.0.0)
20
38
 
21
39
  PLATFORMS
40
+ ruby
22
41
  x86_64-darwin-20
23
42
 
24
43
  DEPENDENCIES
25
44
  diffy (= 3.3.0)
26
45
  json (= 2.5.1)
27
46
  rspec (= 3.10.0)
47
+ rspec-parameterized (= 0.5.0)
28
48
 
29
49
  BUNDLED WITH
30
- 2.2.29
50
+ 2.2.30
data/README.md CHANGED
@@ -7,10 +7,10 @@ that the entire file is readable on the diff.
7
7
 
8
8
  The result are diffs that are much easier to read:
9
9
 
10
- | Diff | IpynbDiff - HTML | IpynbDiff - Percent |
11
- | ------ | ------ | ------ |
12
- | [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
- | ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
10
+ | Diff | | IpynbDiff |
11
+ | ------ | ------ |
12
+ | [Here](example/diff.txt) | [Here](example/ipynbdiff_percent.txt) |
13
+ | ![](example/img/diff.png) | ![](example/img/ipynbdiff_percent.png) |
14
14
 
15
15
 
16
16
  This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
@@ -52,7 +52,6 @@ Options:
52
52
 
53
53
  ```ruby
54
54
  @default_transform_options = {
55
- include_metadata: false, # Whether to include or not the notebook metadata (kernel, language, etc)
56
- cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
55
+ include_frontmatter: false, # Whether to include or not the notebook metadata (kernel, language, etc)
57
56
  }
58
57
  ```
data/ipynbdiff.gemspec CHANGED
@@ -1,29 +1,38 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ lib = File.expand_path('lib/..', __dir__)
4
+ $LOAD_PATH.unshift lib unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'lib/version'
7
+
3
8
  Gem::Specification.new do |s|
4
9
  s.name = 'ipynbdiff'
5
- s.version = ENV['LIB_VERSION']
10
+ s.version = IpynbDiff::VERSION
6
11
  s.summary = 'Human Readable diffs for Jupyter Notebooks'
7
12
  s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
8
13
  s.authors = ['Eduardo Bonet']
9
14
  s.email = 'ebonet@gitlab.com'
10
15
  # Specify which files should be added to the gem when it is released.
11
16
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
12
- s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
17
+ s.files = Dir.chdir(File.expand_path(__dir__)) do
13
18
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(spec|example)/}) }
14
19
  end
15
- s.homepage =
20
+ s.homepage =
16
21
  'https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff'
17
22
  s.license = 'MIT'
18
23
 
19
24
  s.require_paths = ['lib']
20
25
 
21
- s.add_runtime_dependency 'diffy', '3.3.0'
22
- s.add_runtime_dependency 'json', '2.5.1'
26
+ s.add_runtime_dependency 'diffy', '~> 3.3'
27
+ s.add_runtime_dependency 'json', '~> 2.5', '>= 2.5.1'
23
28
 
24
29
  s.add_development_dependency 'bundler', '~> 2.2'
25
30
  s.add_development_dependency 'guard-rspec'
26
31
  s.add_development_dependency 'pry'
27
32
  s.add_development_dependency 'rake'
28
33
  s.add_development_dependency 'rspec'
34
+ s.add_development_dependency 'rspec-parametized'
35
+ s.metadata = {
36
+ 'rubygems_mfa_required' => 'true'
37
+ }
29
38
  end
data/lib/diff.rb ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Custom differ for Jupyter Notebooks
4
+ module IpynbDiff
5
+ require 'delegate'
6
+
7
+ # The result of a diff object
8
+ class Diff < SimpleDelegator
9
+ require 'diffy'
10
+
11
+ attr_reader :from, :to
12
+
13
+ def initialize(from, to, diffy_opts)
14
+ super(Diffy::Diff.new(from.as_text, to.as_text, **diffy_opts))
15
+
16
+ @from = from
17
+ @to = to
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ class InvalidTokenError < StandardError
5
+ end
6
+
7
+ # Creates a symbol map for a ipynb file (JSON format)
8
+ class IpynbSymbolMap
9
+ class << self
10
+ def parse(notebook)
11
+ IpynbSymbolMap.new(notebook).parse('')
12
+ end
13
+ end
14
+
15
+ attr_reader :current_line, :char_idx, :results
16
+
17
+ WHITESPACE_CHARS = ["\t", "\r", ' ', "\n"].freeze
18
+
19
+ VALUE_STOPPERS = [',', '[', ']', '{', '}', *WHITESPACE_CHARS].freeze
20
+
21
+ def initialize(notebook)
22
+ @chars = notebook.chars
23
+ @current_line = 0
24
+ @char_idx = 0
25
+ @results = {}
26
+ end
27
+
28
+ def parse(prefix = '.')
29
+ skip_whitespaces
30
+
31
+ if (c = current_char) == '"'
32
+ parse_string
33
+ elsif c == '['
34
+ parse_array(prefix)
35
+ elsif c == '{'
36
+ parse_object(prefix)
37
+ else
38
+ parse_value
39
+ end
40
+
41
+ results
42
+ end
43
+
44
+ def parse_array(prefix)
45
+ # [1, 2, {"some": "object"}, [1]]
46
+
47
+ i = 0
48
+
49
+ current_should_be '['
50
+
51
+ loop do
52
+ break if skip_beginning(']')
53
+
54
+ new_prefix = "#{prefix}.#{i}"
55
+
56
+ add_result(new_prefix, current_line)
57
+
58
+ parse(new_prefix)
59
+
60
+ i += 1
61
+ end
62
+ end
63
+
64
+ def parse_object(prefix)
65
+ # {"name":"value", "another_name": [1, 2, 3]}
66
+
67
+ current_should_be '{'
68
+
69
+ loop do
70
+ break if skip_beginning('}')
71
+
72
+ prop_name = parse_string
73
+
74
+ new_prefix = "#{prefix}.#{prop_name}"
75
+
76
+ add_result(new_prefix, current_line)
77
+
78
+ next_and_skip_whitespaces
79
+
80
+ current_should_be ':'
81
+
82
+ next_and_skip_whitespaces
83
+
84
+ parse(new_prefix)
85
+ end
86
+ end
87
+
88
+ def parse_string
89
+ value = ''
90
+ prev_char = nil
91
+
92
+ current_should_be '"'
93
+
94
+ loop do
95
+ increment_char_index
96
+ break if (c = current_char) == '"' && prev_char != '\\'
97
+
98
+ value += (prev_char = c)
99
+ end
100
+
101
+ value
102
+ end
103
+
104
+ def add_result(key, line_number)
105
+ @results[key] = line_number
106
+ end
107
+
108
+ def parse_value
109
+ increment_char_index until VALUE_STOPPERS.include?(current_char)
110
+ end
111
+
112
+ def skip_whitespaces
113
+ while WHITESPACE_CHARS.include?(current_char)
114
+ check_for_new_line
115
+ increment_char_index
116
+ end
117
+ end
118
+
119
+ def increment_char_index
120
+ @char_idx += 1
121
+ end
122
+
123
+ def next_and_skip_whitespaces
124
+ increment_char_index
125
+ skip_whitespaces
126
+ end
127
+
128
+ def current_char
129
+ @chars[@char_idx]
130
+ end
131
+
132
+ def current_should_be(another_char)
133
+ raise InvalidTokenError unless current_char == another_char
134
+ end
135
+
136
+ def check_for_new_line
137
+ @current_line += 1 if current_char == "\n"
138
+ end
139
+
140
+ def skip_beginning(closing_char)
141
+
142
+ check_for_new_line
143
+
144
+ next_and_skip_whitespaces
145
+
146
+ return true if current_char == closing_char
147
+
148
+ next_and_skip_whitespaces if current_char == ','
149
+ end
150
+ end
151
+ end
data/lib/ipynbdiff.rb CHANGED
@@ -3,55 +3,20 @@
3
3
  # Human Readable Jupyter Diffs
4
4
  module IpynbDiff
5
5
  require 'transformer'
6
- require 'diffy'
6
+ require 'diff'
7
7
 
8
- @default_transform_options = {
9
- include_metadata: false,
10
- cell_decorator: :html
11
- }
8
+ def self.diff(from, to, raise_if_invalid_nb: false, include_frontmatter: false, diffy_opts: {})
9
+ transformer = Transformer.new(include_frontmatter: include_frontmatter)
12
10
 
13
- @default_diff_options = {
14
- preprocess_input: true,
15
- write_output_to: nil,
16
- format: :text,
17
- sources_are_files: false,
18
- raise_if_invalid_notebook: false,
19
- transform_options: @default_transform_options,
20
- diff_opts: {
21
- include_diff_info: false
22
- }
23
- }.freeze
24
-
25
- def self.prepare_input(to_prepare, options)
26
- return '' unless to_prepare
27
-
28
- prep = to_prepare
29
- prep = File.read(prep) if options[:sources_are_files]
30
- prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
31
- prep
32
- end
33
-
34
- def self.diff(
35
- from_notebook,
36
- to_notebook,
37
- options = @default_diff_options
38
- )
39
- options = @default_diff_options.merge(options)
40
-
41
- from = prepare_input(from_notebook, options)
42
- to = prepare_input(to_notebook, options)
43
-
44
- d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
45
- File.write(options[:write_output_to], d) if options[:write_output_to]
46
- d
11
+ Diff.new(transformer.transform(from), transformer.transform(to), diffy_opts)
47
12
  rescue InvalidNotebookError
48
- raise if options[:raise_if_invalid_notebook]
13
+ raise if raise_if_invalid_nb
49
14
  end
50
15
 
51
- def self.transform(notebook, raise_errors: false, options: @default_transform_options)
52
- options = @default_transform_options.merge(options)
16
+ def self.transform(notebook, raise_errors: false, include_frontmatter: true)
17
+ return unless notebook
53
18
 
54
- Transformer.new(**options).transform(notebook)
19
+ Transformer.new(include_frontmatter: include_frontmatter).transform(notebook).as_text
55
20
  rescue InvalidNotebookError
56
21
  raise if raise_errors
57
22
  end
@@ -1,65 +1,79 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module IpynbDiff
4
-
5
4
  # Transforms Jupyter output data into markdown
6
5
  class OutputTransformer
6
+ require 'symbolized_markdown_helper'
7
+ include SymbolizedMarkdownHelper
7
8
 
8
9
  ORDERED_KEYS = {
9
10
  'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
- 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
+ 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex],
12
+ 'stream' => %w[text]
11
13
  }.freeze
12
14
 
13
- def transform(output)
14
- case (output_type = output['output_type'])
15
- when 'error'
16
- transform_error(output['traceback'])
17
- when 'execute_result', 'display_data'
18
- transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
- end
15
+ def transform(output, symbol)
16
+ transformed = case (output_type = output['output_type'])
17
+ when 'error'
18
+ transform_error(output['traceback'], symbol / 'traceback')
19
+ when 'execute_result', 'display_data'
20
+ transform_non_error(ORDERED_KEYS[output_type], output['data'], symbol / 'data')
21
+ when 'stream'
22
+ transform_element('text', output['text'], symbol)
23
+ end
24
+
25
+ transformed ? decorate_output(transformed, output, symbol) : []
20
26
  end
21
27
 
22
- def transform_error(traceback)
23
- traceback.map do |t|
24
- t.split("\n").map do |line|
25
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
28
+ def decorate_output(output_rows, output, symbol)
29
+ [
30
+ _,
31
+ _(symbol, %(%%%% Output: #{output['output_type']})),
32
+ _,
33
+ *output_rows
34
+ ]
35
+ end
36
+
37
+ def transform_error(traceback, symbol)
38
+ traceback.map.with_index do |t, idx|
39
+ t.split("\n").map do |l|
40
+ _(symbol / idx, l.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip)
26
41
  end
27
42
  end
28
43
  end
29
44
 
30
- def transform_non_error(accepted_keys, elements)
31
- accepted_keys.map do |key|
32
- transform_element(key, elements[key]) if elements.key?(key)
33
- end.flatten
45
+ def transform_non_error(accepted_keys, elements, symbol)
46
+ accepted_keys.filter { |key| elements.key?(key) }.map do |key|
47
+ transform_element(key, elements[key], symbol)
48
+ end
34
49
  end
35
50
 
36
- def transform_element(output_type, output_element)
51
+ def transform_element(output_type, output_element, symbol_prefix)
52
+ new_symbol = symbol_prefix / output_type
37
53
  case output_type
38
54
  when 'image/png', 'image/jpeg'
39
- transform_image(output_type, output_element)
55
+ transform_image(output_type, output_element, new_symbol)
40
56
  when 'image/svg+xml'
41
- transform_svg(output_element)
42
- when 'text/markdown', 'text/latex', 'text/plain'
43
- transform_text(output_element)
57
+ transform_svg(output_element, new_symbol)
58
+ when 'text/markdown', 'text/latex', 'text/plain', 'text'
59
+ transform_text(output_element, new_symbol)
44
60
  end
45
61
  end
46
62
 
47
- def transform_image(image_type, image_content)
48
- [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
63
+ def transform_image(image_type, image_content, symbol)
64
+ _(symbol, " ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})")
49
65
  end
50
66
 
51
- def transform_svg(image_content)
67
+ def transform_svg(image_content, symbol)
52
68
  lines = image_content.is_a?(Array) ? image_content : [image_content]
53
69
 
54
- single_line = lines.map(&:strip).join('').gsub(/\s+/, ' ')
70
+ single_line = lines.map(&:strip).join.gsub(/\s+/, ' ')
55
71
 
56
- [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
72
+ _(symbol, " ![](data:image/svg+xml;utf8,#{single_line})")
57
73
  end
58
74
 
59
- def transform_text(text_content)
60
- lines = text_content.is_a?(Array) ? text_content : [text_content]
61
-
62
- lines.map { |line| " #{line}" }.append("\n")
75
+ def transform_text(text_content, symbol)
76
+ symbolize_array(symbol, text_content) { |l| " #{l.rstrip}" }
63
77
  end
64
78
  end
65
79
  end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ # Helper functions
5
+ module SymbolizedMarkdownHelper
6
+
7
+ def _(symbol = nil, content = '')
8
+ { symbol: symbol, content: content }
9
+ end
10
+
11
+ def array_if_not_array(thing)
12
+ thing.is_a?(Array) ? thing : [thing]
13
+ end
14
+
15
+ def symbolize_array(symbol, content, &block)
16
+ if content.is_a?(Array)
17
+ content.map.with_index { |l, idx| _(symbol / idx, block.call(l)) }
18
+ else
19
+ _(symbol, content)
20
+ end
21
+ end
22
+ end
23
+
24
+ # Simple wrapper for a string
25
+ class JsonSymbol < String
26
+ def /(other)
27
+ JsonSymbol.new((other.is_a?(Array) ? [self, *other] : [self, other]).join('.'))
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+ # Notebook that was transformed into md, including location of source cells
5
+ class TransformedNotebook
6
+ attr_reader :blocks
7
+
8
+ def as_text
9
+ @blocks.map { |b| b[:content] }.join("\n")
10
+ end
11
+
12
+ private
13
+
14
+ def initialize(lines = [], symbol_map = {})
15
+ @blocks = lines.map do |line|
16
+ { content: line[:content], source_symbol: (symbol = line[:symbol]), source_line: symbol && symbol_map[symbol] }
17
+ end
18
+ end
19
+ end
20
+ end
data/lib/transformer.rb CHANGED
@@ -9,14 +9,15 @@ module IpynbDiff
9
9
  require 'json'
10
10
  require 'yaml'
11
11
  require 'output_transformer'
12
+ require 'symbolized_markdown_helper'
13
+ require 'ipynb_symbol_map'
14
+ require 'transformed_notebook'
15
+ include SymbolizedMarkdownHelper
12
16
 
13
- @cell_decorator = :html
14
- @include_metadata = true
17
+ @include_frontmatter = true
15
18
 
16
-
17
- def initialize(include_metadata: true, cell_decorator: :html)
18
- @include_metadata = include_metadata
19
- @cell_decorator = cell_decorator
19
+ def initialize(include_frontmatter: true)
20
+ @include_frontmatter = include_frontmatter
20
21
  @output_transformer = OutputTransformer.new
21
22
  end
22
23
 
@@ -31,71 +32,68 @@ module IpynbDiff
31
32
  end
32
33
 
33
34
  def transform(notebook)
35
+ return TransformedNotebook.new unless notebook
36
+
34
37
  notebook_json = validate_notebook(notebook)
35
- transformed_blocks = notebook_json['cells'].map do |cell|
36
- decorate_cell(transform_cell(cell, notebook_json), cell)
37
- end
38
+ transformed = transform_document(notebook_json)
39
+ symbol_map = IpynbSymbolMap.parse(notebook)
38
40
 
39
- transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
40
- transformed_blocks.join("\n")
41
+ TransformedNotebook.new(transformed, symbol_map)
41
42
  end
42
43
 
43
- def decorate_cell(rows, cell)
44
- tags = cell['metadata']&.fetch('tags', [])
45
- type = cell['cell_type'] || 'raw'
44
+ def transform_document(notebook)
45
+ symbol = JsonSymbol.new('.cells')
46
46
 
47
- case @cell_decorator
48
- when :html
49
- rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
50
- .append("\n</div>\n")
51
- when :percent
52
- rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
53
- else
54
- rows
55
- end.join('')
56
- end
47
+ transformed_blocks = notebook['cells'].map.with_index do |cell, idx|
48
+ decorate_cell(transform_cell(cell, notebook, symbol / idx), cell, symbol / idx)
49
+ end
57
50
 
58
- def transform_cell(cell, notebook)
59
- cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
51
+ transformed_blocks.prepend(transform_metadata(notebook)) if @include_frontmatter
52
+ transformed_blocks.flatten
60
53
  end
61
54
 
62
- def decorate_output(output_rows, output)
63
- if @cell_decorator == :html
64
- output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
65
- else
66
- output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
67
- end
68
- end
55
+ def decorate_cell(rows, cell, symbol)
56
+ tags = cell['metadata']&.fetch('tags', [])
57
+ type = cell['cell_type'] || 'raw'
69
58
 
70
- def transform_code_cell(cell, notebook)
71
59
  [
72
- %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}\n),
73
- *cell['source'],
74
- "\n```\n",
75
- *cell['outputs'].map { |output| transform_output(output) }
60
+ _(symbol, %(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')})),
61
+ _,
62
+ rows,
63
+ _
76
64
  ]
77
65
  end
78
66
 
79
- def transform_output(output)
80
- transformed = @output_transformer.transform(output)
67
+ def transform_cell(cell, notebook, symbol)
68
+ cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook, symbol) : transform_text_cell(cell, symbol)
69
+ end
81
70
 
82
- decorate_output(transformed, output).join('') if transformed
71
+ def transform_code_cell(cell, notebook, symbol)
72
+ [
73
+ _(symbol / 'source', %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''})),
74
+ symbolize_array(symbol / 'source', cell['source'], &:rstrip),
75
+ _(nil, '```'),
76
+ cell['outputs'].map.with_index do |output, idx|
77
+ @output_transformer.transform(output, symbol / ['outputs', idx])
78
+ end
79
+ ]
83
80
  end
84
81
 
85
- def transform_text_cell(cell)
86
- source = cell['source']
87
- (source.is_a?(Array) ? source : [source]).append("\n")
82
+ def transform_text_cell(cell, symbol)
83
+ symbolize_array(symbol / 'source', cell['source'], &:rstrip)
88
84
  end
89
85
 
90
86
  def transform_metadata(notebook_json)
91
- {
87
+ as_yaml = {
92
88
  'jupyter' => {
93
89
  'kernelspec' => notebook_json['metadata']['kernelspec'],
94
90
  'language_info' => notebook_json['metadata']['language_info'],
95
91
  'nbformat' => notebook_json['nbformat'],
96
92
  'nbformat_minor' => notebook_json['nbformat_minor']
97
93
  }
98
- }.to_yaml + "---\n"
94
+ }.to_yaml
95
+
96
+ as_yaml.split("\n").map { |l| _(nil, l) }.append(_(nil, '---'), _)
99
97
  end
100
98
  end
101
99
  end
data/lib/version.rb ADDED
@@ -0,0 +1,5 @@
1
+ # lib/emoticon/version.rb
2
+
3
+ module IpynbDiff
4
+ VERSION = "0.4.2"
5
+ end
metadata CHANGED
@@ -1,41 +1,47 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-18 00:00:00.000000000 Z
11
+ date: 2022-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - '='
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 3.3.0
19
+ version: '3.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - '='
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 3.3.0
26
+ version: '3.3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: json
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - '='
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.5'
34
+ - - ">="
32
35
  - !ruby/object:Gem::Version
33
36
  version: 2.5.1
34
37
  type: :runtime
35
38
  prerelease: false
36
39
  version_requirements: !ruby/object:Gem::Requirement
37
40
  requirements:
38
- - - '='
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '2.5'
44
+ - - ">="
39
45
  - !ruby/object:Gem::Version
40
46
  version: 2.5.1
41
47
  - !ruby/object:Gem::Dependency
@@ -108,6 +114,20 @@ dependencies:
108
114
  - - ">="
109
115
  - !ruby/object:Gem::Version
110
116
  version: '0'
117
+ - !ruby/object:Gem::Dependency
118
+ name: rspec-parametized
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
111
131
  description: Better diff for Jupyter Notebooks by first preprocessing them and removing
112
132
  clutter
113
133
  email: ebonet@gitlab.com
@@ -115,19 +135,28 @@ executables: []
115
135
  extensions: []
116
136
  extra_rdoc_files: []
117
137
  files:
138
+ - ".VERSION.TMPL"
118
139
  - ".gitignore"
119
140
  - ".gitlab-ci.yml"
141
+ - ".rubocop.yml"
142
+ - ".rubocop_todo.yml"
120
143
  - Gemfile
121
144
  - Gemfile.lock
122
145
  - README.md
123
146
  - ipynbdiff.gemspec
147
+ - lib/diff.rb
148
+ - lib/ipynb_symbol_map.rb
124
149
  - lib/ipynbdiff.rb
125
150
  - lib/output_transformer.rb
151
+ - lib/symbolized_markdown_helper.rb
152
+ - lib/transformed_notebook.rb
126
153
  - lib/transformer.rb
154
+ - lib/version.rb
127
155
  homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
128
156
  licenses:
129
157
  - MIT
130
- metadata: {}
158
+ metadata:
159
+ rubygems_mfa_required: 'true'
131
160
  post_install_message:
132
161
  rdoc_options: []
133
162
  require_paths: