RubyGems - ipynbdiff - Versions diffs - 0.3.3 → 0.3.4 - Mend

ipynbdiff 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 80e13b121205b2811ffb5d508e0978284562ced1b359156530b32ec2e10de6d4
-  data.tar.gz: 9a64f732b7e2ca0143bab893eb8e7713761f533d79798935d137d88d58f401e6
+  metadata.gz: d87c49e29a069b5064985b1b1097ac4d02c375161d9fe64df8e70f0155419220
+  data.tar.gz: 97931332962139f7f129a546bb7a7f9b670d779dcb032b27fbb1a3faa6f7dc85
 SHA512:
-  metadata.gz: 27f12477595add7a90f8f8d3ebf3b46b32ae24ec5c25a9417b03ff52d0be47ca68db501a2feca3337a0e8cf8ccee257cceccb7b9cbbf6716680641fb00a6a7f7
-  data.tar.gz: b1c3cd5e12a5f949be5e14c1dd2b69b93304b4b507787c91f8d7cc383855bcf52dabf2edbfe4a67e4410e20d575686f71dbfe5d2b7b4853678acf978555c4cd4
+  metadata.gz: 86c4be0fc53e0db2172221a17dd07e0c3f37492ffc699075b0de74dea3265feebee2207ef4e9170ead59be15c42b858918fbd2e56c7ed109cb674fbf7eb7dbbb
+  data.tar.gz: 10fdb124114a73425edc57f5c9fb80fbd546f6b8e7d58929a327f1be268774d8409238630286aa050e41384bf53c951d2b8c67aaeb76835bc3e0f37672775d48

data/README.md CHANGED Viewed

@@ -1,3 +1,57 @@
-# rb-ipynbdiff: Better Jupyter Notebook diffs, in Ruby
+# IpynbDiff: Better diff for Jupyter Notebooks
-This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff)
+This is a simple diff tool that cleans up jupyter notebooks, transforming each [notebook](example/1/from.ipynb)
+into a [readable markdown file](example/1/from_html.md), keeping the output of cells, and running the
+diff after. Markdowns are generated using an opinionated Jupyter to Markdown conversion. This means
+that the entire file is readable on the diff.
+The result are diffs that are much easier to read:
+| Diff | IpynbDiff - HTML | IpynbDiff - Percent |
+| ------ | ------ | ------ |
+| [Here](example/diff.txt) | [Here](example/ipynbdiff.txt) | [Here](example/ipynbdiff_percent.txt) |
+| ![](example/img/diff.png) | ![](example/img/ipynbdiff_html.png) | ![](example/img/ipynbdiff_percent.png) |
+This started as a port of This is a port of [ipynbdiff](https://gitlab.com/gitlab-org/incubation-engineering/mlops/ipynbdiff),
+but now has extended functionality although not working as git driver.
+## Usage
+### Generating diffs
+```ruby
+IpynbDiff.diff(from_path, to_path, options)
+```
+Options:
+```ruby
+@default_transform_options = {
+  preprocess_input: TRUE, # Whether the input should be transformed
+  write_output_to: nil, # Pass a path to save the output to a file
+  format: :text, # These are the formats Diffy accepts https://github.com/samg/diffy
+  sources_are_files: FALSE, # Weather to use the from/to as string or path to a file
+  transform_options: @default_transform_options, # See below for transform options
+  diff_opts: {
+    include_diff_info: FALSE # These are passed to Diffy https://github.com/samg/diffy
+  }
+}
+```
+### Transforming the notebooks
+It might be necessary to have the transformed files in addition to the diff.
+```ruby
+IpynbDiff.transform(notebook, options)
+```
+Options:
+```ruby
+@default_transform_options = {
+    include_metadata: FALSE, # Whether to include or not the notebook metadata (kernel, language, etc)
+    cell_decorator: :html # :html is useful to add styling with css, :percent is better for text format
+}
+```

data/ipynbdiff.gemspec CHANGED Viewed

@@ -2,7 +2,7 @@
 Gem::Specification.new do |s|
   s.name        = 'ipynbdiff'
-  s.version     = '0.3.3'
+  s.version     = '0.3.4'
   s.summary     = 'Human Readable diffs for Jupyter Notebooks'
   s.description = 'Better diff for Jupyter Notebooks by first preprocessing them and removing clutter'
   s.authors     = ['Eduardo Bonet']

data/lib/ipynbdiff.rb CHANGED Viewed

@@ -2,15 +2,20 @@
 # Human Readable Jupyter Diffs
 module IpynbDiff
-  require 'transformer.rb'
+  require 'transformer'
   require 'diffy'
-  @default_options = {
+  @default_transform_options = {
+    include_metadata: FALSE,
+    cell_decorator: :html
+  }
+  @default_diff_options = {
     preprocess_input: TRUE,
     write_output_to: nil,
     format: :text,
     sources_are_files: FALSE,
-    include_metadata: TRUE,
+    transform_options: @default_transform_options,
     diff_opts: {
       include_diff_info: FALSE
     }
@@ -19,7 +24,7 @@ module IpynbDiff
   def self.prepare_input(to_prepare, options)
     prepared = to_prepare
     prepared = File.read(prepared) if options[:sources_are_files]
-    prepared = Transformer.transform(prepared, include_metadata: options[:include_metadata]) if options[:preprocess_input]
+    prepared = transform(prepared, options[:transform_options]) if options[:preprocess_input]
     prepared
   end
@@ -29,7 +34,7 @@ module IpynbDiff
     to_notebook,
     options = {}
   )
-    options = @default_options.merge(options)
+    options = @default_diff_options.merge(options)
     from = from_notebook && prepare_input(from_notebook, options) || ''
     to = to_notebook && prepare_input(to_notebook, options) || ''
@@ -40,4 +45,9 @@ module IpynbDiff
     d
   end
+  def self.transform(notebook, options)
+    options = @default_transform_options.merge(options)
+    Transformer.new(**options).transform(notebook)
+  end
 end

data/lib/transformer.rb CHANGED Viewed

@@ -6,88 +6,101 @@ module IpynbDiff
     require 'json'
     require 'yaml'
-    def self.transform(notebook, include_metadata: TRUE)
+    @cell_decorator = :html
+    @include_metadata = TRUE
+    def initialize(include_metadata: TRUE, cell_decorator: :html)
+      @include_metadata = include_metadata
+      @cell_decorator = cell_decorator
+    end
+    def transform(notebook)
       notebook_json = JSON.parse(notebook)
-      transformed_blocks = notebook_json['cells'].map { |cell| transform_cell(cell, notebook_json) }
-      transformed_blocks.prepend(transform_metadata(notebook_json)) if include_metadata
+      transformed_blocks = notebook_json['cells'].map do |cell|
+        decorate_cell(transform_cell(cell, notebook_json), cell)
+      end
+      transformed_blocks.prepend(transform_metadata(notebook_json)) if @include_metadata
       transformed_blocks.join("\n")
     end
-    def self.transform_cell(cell, notebook)
+    def decorate_cell(rows, cell)
+      tags = cell['metadata']&.fetch('tags', [])
+      type = cell['cell_type']
+      case @cell_decorator
+      when :html
+        rows.prepend(%(<div class="cell #{type}" data-id="#{cell['id']}" data-tags="#{tags&.join(' ')}">\n\n))
+            .append("\n</div>\n")
+      when :percent
+        rows.prepend(%(%% Cell type:#{type} id:#{cell['id']} tags:#{tags&.join(',')}\n\n))
+      else
+        rows
+      end.join('')
+    end
+    def transform_cell(cell, notebook)
       cell['cell_type'] == 'code' ? transform_code_cell(cell, notebook) : transform_text_cell(cell)
     end
-    def self.transform_code_cell(cell, notebook)
-      tags = cell['metadata']&.fetch('tags', [])&.join(' ') || ''
+    def decorate_output(output_rows, output)
+      if @cell_decorator == :html
+        output_rows.prepend(%(\n<div class="output #{output['output_type']}">\n\n)).append("\n</div>\n")
+      else
+        output_rows.prepend(%(\n%%%% Output: #{output['output_type']}\n\n))
+      end
+    end
+    def transform_code_cell(cell, notebook)
       [
-        %(<div class="cell code" data-id="#{cell['id']}" data-tags="#{tags}">\n\n),
         %(``` #{notebook['metadata']['kernelspec']['language']}\n),
         *cell['source'],
         "\n```\n",
-        *cell['outputs'].map { |output| transform_output(output) },
-        "\n</div>\n"
-      ].join('')
+        *cell['outputs'].map { |output| transform_output(output) }
+      ]
     end
-    def self.format_traceback(traceback)
+    def format_traceback(traceback)
       traceback.map do |t|
         t.split("\n").map do |line|
-          line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", '    ').gsub(/\u001B/, '').rstrip
+          line.gsub(/\[[0-9][0-9;]*m/, '').sub("\u001B", '    ').gsub(/\u001B/, '').rstrip << "\n"
         end
-      end.join("\n")
+      end
     end
-    def self.transform_execute_result(output)
-      [
-        %(\n<div class="output execute_result">\n\n),
-        *output['data']['text/plain'].map { |line| "    #{line}" },
-        "\n\n</div>\n"
-      ].join('')
+    def transform_execute_result(output)
+      output['data']['text/plain'].map { |line| "    #{line}" }.append("\n")
     end
-    def self.transform_image_result(output)
+    def transform_image_result(output)
       if output['data'].key?('image/png')
-        [
-          %(\n<div class="output display_data">\n\n),
-          "![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})",
-          "\n\n</div>\n"
-        ].join('')
+        ["![](data:image/png;base64,#{output['data']['image/png'].gsub("\n", '')})", "\n"]
       end
     end
-    def self.transform_error_result(output)
-      [
-        %(\n<div class="output error">\n\n),
-        format_traceback(output['traceback']),
-        "\n\n</div>\n"
-      ].join('')
+    def transform_error_result(output)
+      format_traceback(output['traceback'])
     end
-    def self.transform_output(output)
-      case output['output_type']
-      when 'execute_result'
-        transform_execute_result(output)
-      when 'display_data'
-        transform_image_result(output)
-      when 'error'
-        transform_error_result(output)
-      end
-    end
+    def transform_output(output)
+      transformed =
+        case output['output_type']
+        when 'execute_result'
+          transform_execute_result(output)
+        when 'display_data'
+          transform_image_result(output)
+        when 'error'
+          transform_error_result(output)
+        end
-    def self.transform_text_cell(cell)
-      tags = cell['metadata']&.fetch('tags', [])&.join(' ') || ''
-      id = cell['id']
-      cell_type = cell['cell_type']
+      decorate_output(transformed, output).join('') if transformed
+    end
-      [
-        %(<div class="cell #{cell_type}" data-id="#{id}" data-tags="#{tags}">\n\n),
-        *cell['source'],
-        "\n\n</div>\n"
-      ].join('')
+    def transform_text_cell(cell)
+      cell['source'].append("\n")
     end
-    def self.transform_metadata(notebook_json)
+    def transform_metadata(notebook_json)
       {
         'jupyter' => {
           'kernelspec' => notebook_json['metadata']['kernelspec'],

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ipynbdiff
 version: !ruby/object:Gem::Version
-  version: 0.3.3
+  version: 0.3.4
 platform: ruby
 authors:
 - Eduardo Bonet
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-10-13 00:00:00.000000000 Z
+date: 2021-10-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: diffy