ipynbdiff 0.3.4 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d87c49e29a069b5064985b1b1097ac4d02c375161d9fe64df8e70f0155419220
4
- data.tar.gz: 97931332962139f7f129a546bb7a7f9b670d779dcb032b27fbb1a3faa6f7dc85
3
+ metadata.gz: fbeaad80969c974720e75336301dc02822fbf183b278d8e5ddd5dd18c65ddde1
4
+ data.tar.gz: c186fa7fac873dff429cf3fd24c4a5cb61901670270850fd3d623c363db21182
5
5
  SHA512:
6
- metadata.gz: 86c4be0fc53e0db2172221a17dd07e0c3f37492ffc699075b0de74dea3265feebee2207ef4e9170ead59be15c42b858918fbd2e56c7ed109cb674fbf7eb7dbbb
7
- data.tar.gz: 10fdb124114a73425edc57f5c9fb80fbd546f6b8e7d58929a327f1be268774d8409238630286aa050e41384bf53c951d2b8c67aaeb76835bc3e0f37672775d48
6
+ metadata.gz: 23e6a0c192d671fdcb394334a89d40eef9a58e39d934d1138e0ba37943c928d035d2f471e868b2af3d7f24a02f77d927b1e72d8cd3dd8df1f6ef9616796a8ab0
7
+ data.tar.gz: 9632766d4dd4e6e57775d59d7a14d01fb664156bd3c41b1af894481f4bf33f807b54ebad0f664184f606c37a6b8f818326e7238e320ba06135f192753c659f20
data/.gitlab-ci.yml ADDED
@@ -0,0 +1,6 @@
1
+ specs:
2
+ stage: test
3
+ image: ruby:2.7
4
+ script:
5
+ - bundle install
6
+ - bundle exec rspec
data/Gemfile CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ source "https://rubygems.org"
4
+
3
5
  gem 'diffy', '3.3.0'
4
6
  gem 'json', '2.5.1'
5
7
  gem 'rspec', '3.10.0'
data/Gemfile.lock CHANGED
@@ -1,4 +1,5 @@
1
1
  GEM
2
+ remote: https://rubygems.org/
2
3
  specs:
3
4
  diff-lcs (1.4.4)
4
5
  diffy (3.3.0)
@@ -26,4 +27,4 @@ DEPENDENCIES
26
27
  rspec (= 3.10.0)
27
28
 
28
29
  BUNDLED WITH
29
- 2.2.28
30
+ 2.2.29
data/README.md CHANGED
@@ -28,15 +28,16 @@ Options:
28
28
 
29
29
  ```ruby
30
30
  @default_transform_options = {
31
- preprocess_input: TRUE, # Whether the input should be transformed
31
+ preprocess_input: true, # Whether the input should be transformed
32
32
  write_output_to: nil, # Pass a path to save the output to a file
33
33
  format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
34
- sources_are_files: FALSE, # Weather to use the from/to as string or path to a file
34
+ sources_are_files: false, # Weather to use the from/to as string or path to a file
35
+ raise_if_invalid_notebook: false, # Raises an error if the notebooks are invalid, otherwise returns nil
35
36
  transform_options: @default_transform_options, # See below for transform options
36
37
  diff_opts: {
37
- include_diff_info: FALSE # These are passed to Diffy https://github.com/samg/diffy
38
+ include_diff_info: false # These are passed to Diffy https://github.com/samg/diffy
38
39
  }
39
- }
40
+ }
40
41
  ```
41
42
 
42
43
  ### Transforming the notebooks
@@ -51,7 +52,7 @@ Options:
51
52
 
52
53
  ```ruby
53
54
  @default_transform_options = {
54
- include_metadata: FALSE, # Whether to include or not the notebook metadata (kernel, language, etc)
55
+ include_metadata: false, # Whether to include or not the notebook metadata (kernel, language, etc)
55
56
  cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
56
57
  }
57
58
  ```
data/ipynbdiff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'ipynbdiff'
5
- s.version = '0.3.4'
5
+ s.version = ENV['LIB_VERSION']
6
6
  s.summary = 'Human Readable diffs for Jupyter Notebooks'
7
7
  s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
8
8
  s.authors = ['Eduardo Bonet']
data/lib/ipynbdiff.rb CHANGED
@@ -6,48 +6,53 @@ module IpynbDiff
6
6
  require 'diffy'
7
7
 
8
8
  @default_transform_options = {
9
- include_metadata: FALSE,
9
+ include_metadata: false,
10
10
  cell_decorator: :html
11
11
  }
12
12
 
13
13
  @default_diff_options = {
14
- preprocess_input: TRUE,
14
+ preprocess_input: true,
15
15
  write_output_to: nil,
16
16
  format: :text,
17
- sources_are_files: FALSE,
17
+ sources_are_files: false,
18
+ raise_if_invalid_notebook: false,
18
19
  transform_options: @default_transform_options,
19
20
  diff_opts: {
20
- include_diff_info: FALSE
21
+ include_diff_info: false
21
22
  }
22
23
  }.freeze
23
24
 
24
25
  def self.prepare_input(to_prepare, options)
25
- prepared = to_prepare
26
- prepared = File.read(prepared) if options[:sources_are_files]
27
- prepared = transform(prepared, options[:transform_options]) if options[:preprocess_input]
26
+ return '' unless to_prepare
28
27
 
29
- prepared
28
+ prep = to_prepare
29
+ prep = File.read(prep) if options[:sources_are_files]
30
+ prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
31
+ prep
30
32
  end
31
33
 
32
34
  def self.diff(
33
35
  from_notebook,
34
36
  to_notebook,
35
- options = {}
37
+ options = @default_diff_options
36
38
  )
37
39
  options = @default_diff_options.merge(options)
38
40
 
39
- from = from_notebook && prepare_input(from_notebook, options) || ''
40
- to = to_notebook && prepare_input(to_notebook, options) || ''
41
+ from = prepare_input(from_notebook, options)
42
+ to = prepare_input(to_notebook, options)
41
43
 
42
44
  d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
43
-
44
45
  File.write(options[:write_output_to], d) if options[:write_output_to]
45
-
46
46
  d
47
+ rescue InvalidNotebookError
48
+ raise if options[:raise_if_invalid_notebook]
47
49
  end
48
50
 
49
- def self.transform(notebook, options)
51
+ def self.transform(notebook, raise_errors: false, options: @default_transform_options)
50
52
  options = @default_transform_options.merge(options)
53
+
51
54
  Transformer.new(**options).transform(notebook)
55
+ rescue InvalidNotebookError
56
+ raise if raise_errors
52
57
  end
53
58
  end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+
5
+ # Transforms Jupyter output data into markdown
6
+ class OutputTransformer
7
+
8
+ ORDERED_KEYS = {
9
+ 'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
+ 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
+ }.freeze
12
+
13
+ def transform(output)
14
+ case (output_type = output['output_type'])
15
+ when 'error'
16
+ transform_error(output['traceback'])
17
+ when 'execute_result', 'display_data'
18
+ transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
+ end
20
+ end
21
+
22
+ def transform_error(traceback)
23
+ traceback.map do |t|
24
+ t.split("\n").map do |line|
25
+ line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
26
+ end
27
+ end
28
+ end
29
+
30
+ def transform_non_error(accepted_keys, elements)
31
+ accepted_keys.map do |key|
32
+ transform_element(key, elements[key]) if elements.key?(key)
33
+ end.flatten
34
+ end
35
+
36
+ def transform_element(output_type, output_element)
37
+ case output_type
38
+ when 'image/png', 'image/jpeg'
39
+ transform_image(output_type, output_element)
40
+ when 'image/svg+xml'
41
+ transform_svg(output_element)
42
+ when 'text/markdown', 'text/latex', 'text/plain'
43
+ transform_text(output_element)
44
+ end
45
+ end
46
+
47
+ def transform_image(image_type, image_content)
48
+ [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
49
+ end
50
+
51
+ def transform_svg(image_content)
52
+ lines = image_content.is_a?(Array) ? image_content : [image_content]
53
+
54
+ single_line = lines.map(&:strip).join('').gsub(/\s+/, ' ')
55
+
56
+ [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
57
+ end
58
+
59
+ def transform_text(text_content)
60
+ lines = text_content.is_a?(Array) ? text_content : [text_content]
61
+
62
+ lines.map { |line| " #{line}" }.append("\n")
63
+ end
64
+ end
65
+ end
data/lib/transformer.rb CHANGED
@@ -1,21 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module IpynbDiff
4
+ class InvalidNotebookError < StandardError
5
+ end
6
+
4
7
  # Returns a markdown version of the Jupyter Notebook
5
8
  class Transformer
6
9
  require 'json'
7
10
  require 'yaml'
11
+ require 'output_transformer'
8
12
 
9
13
  @cell_decorator = :html
10
- @include_metadata = TRUE
14
+ @include_metadata = true
15
+
11
16
 
12
- def initialize(include_metadata: TRUE, cell_decorator: :html)
17
+ def initialize(include_metadata: true, cell_decorator: :html)
13
18
  @include_metadata = include_metadata
14
19
  @cell_decorator = cell_decorator
20
+ @output_transformer = OutputTransformer.new
15
21
  end
16
22
 
17
- def transform(notebook)
23
+ def validate_notebook(notebook)
18
24
  notebook_json = JSON.parse(notebook)
25
+
26
+ return notebook_json if notebook_json.key?('cells')
27
+
28
+ raise InvalidNotebookError
29
+ rescue JSON::ParserError
30
+ raise InvalidNotebookError
31
+ end
32
+
33
+ def transform(notebook)
34
+ notebook_json = validate_notebook(notebook)
19
35
  transformed_blocks = notebook_json['cells'].map do |cell|
20
36
  decorate_cell(transform_cell(cell, notebook_json), cell)
21
37
  end
@@ -26,7 +42,7 @@ module IpynbDiff
26
42
 
27
43
  def decorate_cell(rows, cell)
28
44
  tags = cell['metadata']&.fetch('tags', [])
29
- type = cell['cell_type']
45
+ type = cell['cell_type'] || 'raw'
30
46
 
31
47
  case @cell_decorator
32
48
  when :html
@@ -53,51 +69,22 @@ module IpynbDiff
53
69
 
54
70
  def transform_code_cell(cell, notebook)
55
71
  [
56
- %(``` #{notebook['metadata']['kernelspec']['language']}\n),
72
+ %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}\n),
57
73
  *cell['source'],
58
74
  "\n```\n",
59
75
  *cell['outputs'].map { |output| transform_output(output) }
60
76
  ]
61
77
  end
62
78
 
63
- def format_traceback(traceback)
64
- traceback.map do |t|
65
- t.split("\n").map do |line|
66
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
67
- end
68
- end
69
- end
70
-
71
- def transform_execute_result(output)
72
- output['data']['text/plain'].map { |line| " #{line}" }.append("\n")
73
- end
74
-
75
- def transform_image_result(output)
76
- if output['data'].key?('image/png')
77
- ["![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})", "\n"]
78
- end
79
- end
80
-
81
- def transform_error_result(output)
82
- format_traceback(output['traceback'])
83
- end
84
-
85
79
  def transform_output(output)
86
- transformed =
87
- case output['output_type']
88
- when 'execute_result'
89
- transform_execute_result(output)
90
- when 'display_data'
91
- transform_image_result(output)
92
- when 'error'
93
- transform_error_result(output)
94
- end
80
+ transformed = @output_transformer.transform(output)
95
81
 
96
82
  decorate_output(transformed, output).join('') if transformed
97
83
  end
98
84
 
99
85
  def transform_text_cell(cell)
100
- cell['source'].append("\n")
86
+ source = cell['source']
87
+ (source.is_a?(Array) ? source : [source]).append("\n")
101
88
  end
102
89
 
103
90
  def transform_metadata(notebook_json)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-18 00:00:00.000000000 Z
11
+ date: 2021-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy
@@ -116,11 +116,13 @@ extensions: []
116
116
  extra_rdoc_files: []
117
117
  files:
118
118
  - ".gitignore"
119
+ - ".gitlab-ci.yml"
119
120
  - Gemfile
120
121
  - Gemfile.lock
121
122
  - README.md
122
123
  - ipynbdiff.gemspec
123
124
  - lib/ipynbdiff.rb
125
+ - lib/output_transformer.rb
124
126
  - lib/transformer.rb
125
127
  homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
126
128
  licenses: