ipynbdiff 0.3.4 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d87c49e29a069b5064985b1b1097ac4d02c375161d9fe64df8e70f0155419220
4
- data.tar.gz: 97931332962139f7f129a546bb7a7f9b670d779dcb032b27fbb1a3faa6f7dc85
3
+ metadata.gz: fbeaad80969c974720e75336301dc02822fbf183b278d8e5ddd5dd18c65ddde1
4
+ data.tar.gz: c186fa7fac873dff429cf3fd24c4a5cb61901670270850fd3d623c363db21182
5
5
  SHA512:
6
- metadata.gz: 86c4be0fc53e0db2172221a17dd07e0c3f37492ffc699075b0de74dea3265feebee2207ef4e9170ead59be15c42b858918fbd2e56c7ed109cb674fbf7eb7dbbb
7
- data.tar.gz: 10fdb124114a73425edc57f5c9fb80fbd546f6b8e7d58929a327f1be268774d8409238630286aa050e41384bf53c951d2b8c67aaeb76835bc3e0f37672775d48
6
+ metadata.gz: 23e6a0c192d671fdcb394334a89d40eef9a58e39d934d1138e0ba37943c928d035d2f471e868b2af3d7f24a02f77d927b1e72d8cd3dd8df1f6ef9616796a8ab0
7
+ data.tar.gz: 9632766d4dd4e6e57775d59d7a14d01fb664156bd3c41b1af894481f4bf33f807b54ebad0f664184f606c37a6b8f818326e7238e320ba06135f192753c659f20
data/.gitlab-ci.yml ADDED
@@ -0,0 +1,6 @@
1
+ specs:
2
+ stage: test
3
+ image: ruby:2.7
4
+ script:
5
+ - bundle install
6
+ - bundle exec rspec
data/Gemfile CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ source "https://rubygems.org"
4
+
3
5
  gem 'diffy', '3.3.0'
4
6
  gem 'json', '2.5.1'
5
7
  gem 'rspec', '3.10.0'
data/Gemfile.lock CHANGED
@@ -1,4 +1,5 @@
1
1
  GEM
2
+ remote: https://rubygems.org/
2
3
  specs:
3
4
  diff-lcs (1.4.4)
4
5
  diffy (3.3.0)
@@ -26,4 +27,4 @@ DEPENDENCIES
26
27
  rspec (= 3.10.0)
27
28
 
28
29
  BUNDLED WITH
29
- 2.2.28
30
+ 2.2.29
data/README.md CHANGED
@@ -28,15 +28,16 @@ Options:
28
28
 
29
29
  ```ruby
30
30
  @default_transform_options = {
31
- preprocess_input: TRUE, # Whether the input should be transformed
31
+ preprocess_input: true, # Whether the input should be transformed
32
32
  write_output_to: nil, # Pass a path to save the output to a file
33
33
  format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
34
- sources_are_files: FALSE, # Weather to use the from/to as string or path to a file
34
+ sources_are_files: false, # Weather to use the from/to as string or path to a file
35
+ raise_if_invalid_notebook: false, # Raises an error if the notebooks are invalid, otherwise returns nil
35
36
  transform_options: @default_transform_options, # See below for transform options
36
37
  diff_opts: {
37
- include_diff_info: FALSE # These are passed to Diffy https://github.com/samg/diffy
38
+ include_diff_info: false # These are passed to Diffy https://github.com/samg/diffy
38
39
  }
39
- }
40
+ }
40
41
  ```
41
42
 
42
43
  ### Transforming the notebooks
@@ -51,7 +52,7 @@ Options:
51
52
 
52
53
  ```ruby
53
54
  @default_transform_options = {
54
- include_metadata: FALSE, # Whether to include or not the notebook metadata (kernel, language, etc)
55
+ include_metadata: false, # Whether to include or not the notebook metadata (kernel, language, etc)
55
56
  cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
56
57
  }
57
58
  ```
data/ipynbdiff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'ipynbdiff'
5
- s.version = '0.3.4'
5
+ s.version = ENV['LIB_VERSION']
6
6
  s.summary = 'Human Readable diffs for Jupyter Notebooks'
7
7
  s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
8
8
  s.authors = ['Eduardo Bonet']
data/lib/ipynbdiff.rb CHANGED
@@ -6,48 +6,53 @@ module IpynbDiff
6
6
  require 'diffy'
7
7
 
8
8
  @default_transform_options = {
9
- include_metadata: FALSE,
9
+ include_metadata: false,
10
10
  cell_decorator: :html
11
11
  }
12
12
 
13
13
  @default_diff_options = {
14
- preprocess_input: TRUE,
14
+ preprocess_input: true,
15
15
  write_output_to: nil,
16
16
  format: :text,
17
- sources_are_files: FALSE,
17
+ sources_are_files: false,
18
+ raise_if_invalid_notebook: false,
18
19
  transform_options: @default_transform_options,
19
20
  diff_opts: {
20
- include_diff_info: FALSE
21
+ include_diff_info: false
21
22
  }
22
23
  }.freeze
23
24
 
24
25
  def self.prepare_input(to_prepare, options)
25
- prepared = to_prepare
26
- prepared = File.read(prepared) if options[:sources_are_files]
27
- prepared = transform(prepared, options[:transform_options]) if options[:preprocess_input]
26
+ return '' unless to_prepare
28
27
 
29
- prepared
28
+ prep = to_prepare
29
+ prep = File.read(prep) if options[:sources_are_files]
30
+ prep = transform(prep, raise_errors: true, options: options[:transform_options]) if options[:preprocess_input]
31
+ prep
30
32
  end
31
33
 
32
34
  def self.diff(
33
35
  from_notebook,
34
36
  to_notebook,
35
- options = {}
37
+ options = @default_diff_options
36
38
  )
37
39
  options = @default_diff_options.merge(options)
38
40
 
39
- from = from_notebook && prepare_input(from_notebook, options) || ''
40
- to = to_notebook && prepare_input(to_notebook, options) || ''
41
+ from = prepare_input(from_notebook, options)
42
+ to = prepare_input(to_notebook, options)
41
43
 
42
44
  d = Diffy::Diff.new(from, to, **options[:diff_opts]).to_s(options[:format])
43
-
44
45
  File.write(options[:write_output_to], d) if options[:write_output_to]
45
-
46
46
  d
47
+ rescue InvalidNotebookError
48
+ raise if options[:raise_if_invalid_notebook]
47
49
  end
48
50
 
49
- def self.transform(notebook, options)
51
+ def self.transform(notebook, raise_errors: false, options: @default_transform_options)
50
52
  options = @default_transform_options.merge(options)
53
+
51
54
  Transformer.new(**options).transform(notebook)
55
+ rescue InvalidNotebookError
56
+ raise if raise_errors
52
57
  end
53
58
  end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ module IpynbDiff
4
+
5
+ # Transforms Jupyter output data into markdown
6
+ class OutputTransformer
7
+
8
+ ORDERED_KEYS = {
9
+ 'execute_result' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex text/plain],
10
+ 'display_data' => %w[image/png image/svg+xml image/jpeg text/markdown text/latex]
11
+ }.freeze
12
+
13
+ def transform(output)
14
+ case (output_type = output['output_type'])
15
+ when 'error'
16
+ transform_error(output['traceback'])
17
+ when 'execute_result', 'display_data'
18
+ transform_non_error(ORDERED_KEYS[output_type], output['data'])
19
+ end
20
+ end
21
+
22
+ def transform_error(traceback)
23
+ traceback.map do |t|
24
+ t.split("\n").map do |line|
25
+ line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
26
+ end
27
+ end
28
+ end
29
+
30
+ def transform_non_error(accepted_keys, elements)
31
+ accepted_keys.map do |key|
32
+ transform_element(key, elements[key]) if elements.key?(key)
33
+ end.flatten
34
+ end
35
+
36
+ def transform_element(output_type, output_element)
37
+ case output_type
38
+ when 'image/png', 'image/jpeg'
39
+ transform_image(output_type, output_element)
40
+ when 'image/svg+xml'
41
+ transform_svg(output_element)
42
+ when 'text/markdown', 'text/latex', 'text/plain'
43
+ transform_text(output_element)
44
+ end
45
+ end
46
+
47
+ def transform_image(image_type, image_content)
48
+ [" ![](data:#{image_type};base64,#{image_content.gsub("\n", '')})", "\n"]
49
+ end
50
+
51
+ def transform_svg(image_content)
52
+ lines = image_content.is_a?(Array) ? image_content : [image_content]
53
+
54
+ single_line = lines.map(&:strip).join('').gsub(/\s+/, ' ')
55
+
56
+ [" ![](data:image/svg+xml;utf8,#{single_line})", "\n"]
57
+ end
58
+
59
+ def transform_text(text_content)
60
+ lines = text_content.is_a?(Array) ? text_content : [text_content]
61
+
62
+ lines.map { |line| " #{line}" }.append("\n")
63
+ end
64
+ end
65
+ end
data/lib/transformer.rb CHANGED
@@ -1,21 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module IpynbDiff
4
+ class InvalidNotebookError < StandardError
5
+ end
6
+
4
7
  # Returns a markdown version of the Jupyter Notebook
5
8
  class Transformer
6
9
  require 'json'
7
10
  require 'yaml'
11
+ require 'output_transformer'
8
12
 
9
13
  @cell_decorator = :html
10
- @include_metadata = TRUE
14
+ @include_metadata = true
15
+
11
16
 
12
- def initialize(include_metadata: TRUE, cell_decorator: :html)
17
+ def initialize(include_metadata: true, cell_decorator: :html)
13
18
  @include_metadata = include_metadata
14
19
  @cell_decorator = cell_decorator
20
+ @output_transformer = OutputTransformer.new
15
21
  end
16
22
 
17
- def transform(notebook)
23
+ def validate_notebook(notebook)
18
24
  notebook_json = JSON.parse(notebook)
25
+
26
+ return notebook_json if notebook_json.key?('cells')
27
+
28
+ raise InvalidNotebookError
29
+ rescue JSON::ParserError
30
+ raise InvalidNotebookError
31
+ end
32
+
33
+ def transform(notebook)
34
+ notebook_json = validate_notebook(notebook)
19
35
  transformed_blocks = notebook_json['cells'].map do |cell|
20
36
  decorate_cell(transform_cell(cell, notebook_json), cell)
21
37
  end
@@ -26,7 +42,7 @@ module IpynbDiff
26
42
 
27
43
  def decorate_cell(rows, cell)
28
44
  tags = cell['metadata']&.fetch('tags', [])
29
- type = cell['cell_type']
45
+ type = cell['cell_type'] || 'raw'
30
46
 
31
47
  case @cell_decorator
32
48
  when :html
@@ -53,51 +69,22 @@ module IpynbDiff
53
69
 
54
70
  def transform_code_cell(cell, notebook)
55
71
  [
56
- %(``` #{notebook['metadata']['kernelspec']['language']}\n),
72
+ %(``` #{notebook.dig('metadata', 'kernelspec', 'language') || ''}\n),
57
73
  *cell['source'],
58
74
  "\n```\n",
59
75
  *cell['outputs'].map { |output| transform_output(output) }
60
76
  ]
61
77
  end
62
78
 
63
- def format_traceback(traceback)
64
- traceback.map do |t|
65
- t.split("\n").map do |line|
66
- line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", ' ').gsub(/\u001B/, '').rstrip << "\n"
67
- end
68
- end
69
- end
70
-
71
- def transform_execute_result(output)
72
- output['data']['text/plain'].map { |line| " #{line}" }.append("\n")
73
- end
74
-
75
- def transform_image_result(output)
76
- if output['data'].key?('image/png')
77
- ["![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})", "\n"]
78
- end
79
- end
80
-
81
- def transform_error_result(output)
82
- format_traceback(output['traceback'])
83
- end
84
-
85
79
  def transform_output(output)
86
- transformed =
87
- case output['output_type']
88
- when 'execute_result'
89
- transform_execute_result(output)
90
- when 'display_data'
91
- transform_image_result(output)
92
- when 'error'
93
- transform_error_result(output)
94
- end
80
+ transformed = @output_transformer.transform(output)
95
81
 
96
82
  decorate_output(transformed, output).join('') if transformed
97
83
  end
98
84
 
99
85
  def transform_text_cell(cell)
100
- cell['source'].append("\n")
86
+ source = cell['source']
87
+ (source.is_a?(Array) ? source : [source]).append("\n")
101
88
  end
102
89
 
103
90
  def transform_metadata(notebook_json)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ipynbdiff
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eduardo Bonet
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-18 00:00:00.000000000 Z
11
+ date: 2021-11-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diffy
@@ -116,11 +116,13 @@ extensions: []
116
116
  extra_rdoc_files: []
117
117
  files:
118
118
  - ".gitignore"
119
+ - ".gitlab-ci.yml"
119
120
  - Gemfile
120
121
  - Gemfile.lock
121
122
  - README.md
122
123
  - ipynbdiff.gemspec
123
124
  - lib/ipynbdiff.rb
125
+ - lib/output_transformer.rb
124
126
  - lib/transformer.rb
125
127
  homepage: https://gitlab.com/gitlab-org/incubation-engineering/mlops/rb-ipynbdiff
126
128
  licenses: