ruby_pymill 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +12 -0
- data/README.md +106 -0
- data/bin/ruby_pymill +54 -0
- data/lib/ruby_pymill/api.rb +59 -0
- data/lib/ruby_pymill/version.rb +4 -0
- data/lib/ruby_pymill.rb +86 -0
- metadata +64 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 4c88e00a96d006073f8007aa6395eea1ecb5ccecf530430e9e73b288cb1df18d
|
|
4
|
+
data.tar.gz: da982b4b979d75075f5d1ce914feb8e79f0844441f18f4d1f2dd2b34e4502517
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: bdb19766137d960b14a59513b522e65ea11147c940e708969a7a63a19500920c4880d341bcdb694eed013a8f92a6eaaee546cefd6d293d9fb6cf77ab620eabec
|
|
7
|
+
data.tar.gz: 7d263d39baae7f1055134014f2a6babeb76a931ac3bf51b6bcb47cc053770ec224e6d71701abc337e8647a9ecf08124dba30e1f49d4ac35be68a51f6c116b416
|
data/LICENSE
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
|
data/README.md
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# RubyPyMill
|
|
2
|
+
Running Notebooks the Ruby Way — RubyPyMill and the Art of PoC Automation
|
|
3
|
+
|
|
4
|
+
## Background and Purpose
|
|
5
|
+
RubyPyMill is a lightweight runner and automation starter that allows Ruby
|
|
6
|
+
to control Papermill (a Python Notebook runner).
|
|
7
|
+
|
|
8
|
+
Its purpose is to bridge insights born from Proof of Concept (PoC) work
|
|
9
|
+
into real-world systems.
|
|
10
|
+
|
|
11
|
+
PoC is not the end — it is the beginning of organizational knowledge circulation.
|
|
12
|
+
|
|
13
|
+
By connecting Ruby’s expressive power with Python’s execution ecosystem,
|
|
14
|
+
RubyPyMill enables a development cycle where teams collaborate with data
|
|
15
|
+
in a Ruby-native way.
|
|
16
|
+
|
|
17
|
+
## Design Philosophy — Inspired by Ruby 4.0 @30
|
|
18
|
+
RubyPyMill follows Ruby 4.0’s philosophy of “multi-language collaboration”.
|
|
19
|
+
|
|
20
|
+
- Ruby is responsible for DSLs, orchestration, and control.
|
|
21
|
+
- Python is responsible for execution, computation, and visualization via notebooks.
|
|
22
|
+
|
|
23
|
+
By clearly separating these roles and bridging them automatically,
|
|
24
|
+
RubyPyMill enables reproducible notebook execution from the Ruby ecosystem.
|
|
25
|
+
|
|
26
|
+
“Ruby aims to connect people with people, and tools with tools.”
|
|
27
|
+
— Yukihiro “Matz” Matsumoto
|
|
28
|
+
|
|
29
|
+
## Project Structure
|
|
30
|
+
| Directory | Description |
|
|
31
|
+
|----------|-------------|
|
|
32
|
+
| .vscode/ | VS Code settings (extensions, lint/format, tasks, debug) |
|
|
33
|
+
| .github/workflows/ | CI for Ruby and Python |
|
|
34
|
+
| bin/ | CLI entry point (`ruby_pymill`) |
|
|
35
|
+
| lib/ | RubyPyMill core library |
|
|
36
|
+
| py/ | Python-side environment (Papermill execution) |
|
|
37
|
+
| examples/ | Example notebooks, parameters, and outputs |
|
|
38
|
+
|
|
39
|
+
## Setup
|
|
40
|
+
|
|
41
|
+
### Ruby
|
|
42
|
+
```bash
|
|
43
|
+
bundle install
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Python
|
|
47
|
+
```bash
|
|
48
|
+
python -m venv .venv
|
|
49
|
+
|
|
50
|
+
# macOS / Linux
|
|
51
|
+
source .venv/bin/activate
|
|
52
|
+
|
|
53
|
+
# Windows (PowerShell)
|
|
54
|
+
.\.venv\Scripts\activate
|
|
55
|
+
|
|
56
|
+
pip install -r py/requirements.txt
|
|
57
|
+
python -m ipykernel install --user --name rpymill
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Basic Usage (CLI)
|
|
61
|
+
```bash
|
|
62
|
+
bundle exec ruby bin/ruby_pymill exec <input.ipynb> \
|
|
63
|
+
--output <output.ipynb> \
|
|
64
|
+
[--kernel rpymill] \
|
|
65
|
+
[--params params.json] \
|
|
66
|
+
[--cell_tags "parameters,setup,analysis"] \
|
|
67
|
+
[--dry-run]
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Processing Overview
|
|
71
|
+
1. Load the notebook as JSON.
|
|
72
|
+
2. Filter cells by specified tags (the `parameters` cell is always preserved).
|
|
73
|
+
3. Generate a temporary filtered notebook.
|
|
74
|
+
4. Execute the filtered notebook once using Papermill.
|
|
75
|
+
5. Save the executed result as an output notebook.
|
|
76
|
+
|
|
77
|
+
RubyPyMill acts as a higher-level orchestration layer on top of Papermill,
|
|
78
|
+
ensuring logical structure, reproducibility, and Ruby-friendly control.
|
|
79
|
+
|
|
80
|
+
## Example: Ruby vs Python Radar Chart
|
|
81
|
+
The `examples/` directory includes a notebook that compares Ruby and Python strengths
|
|
82
|
+
using a radar chart.
|
|
83
|
+
|
|
84
|
+
This example demonstrates:
|
|
85
|
+
- Parameter injection via JSON
|
|
86
|
+
- Tag-based execution control
|
|
87
|
+
- Separation of preview and output generation
|
|
88
|
+
- Reproducible notebook execution via RubyPyMill
|
|
89
|
+
|
|
90
|
+
For a detailed explanation in Japanese, see `README.jp.md`.
|
|
91
|
+
|
|
92
|
+
## Programmatic Usage (Experimental)
|
|
93
|
+
RubyPyMill is primarily designed as a CLI tool.
|
|
94
|
+
|
|
95
|
+
Internally, it exposes a Ruby execution API (`RubyPyMill::API`),
|
|
96
|
+
which allows direct invocation from Ruby code.
|
|
97
|
+
|
|
98
|
+
This enables integration with batch jobs, schedulers,
|
|
99
|
+
or future web APIs.
|
|
100
|
+
|
|
101
|
+
The CLI is considered the stable interface.
|
|
102
|
+
The Ruby API is experimental and may change.
|
|
103
|
+
|
|
104
|
+
## License
|
|
105
|
+
MIT License
|
|
106
|
+
Copyright (c) 2025 Hiroshi Inoue / OSS-Vision
|
data/bin/ruby_pymill
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# lib/ を読み込みパスに追加
|
|
5
|
+
$LOAD_PATH.unshift(File.expand_path("../lib", __dir__))
|
|
6
|
+
|
|
7
|
+
require "thor"
|
|
8
|
+
require "ruby_pymill"
|
|
9
|
+
require "shellwords"
|
|
10
|
+
|
|
11
|
+
class RubyPyMillCLI < Thor
|
|
12
|
+
# Thor の予約語 run を避ける。ユーザーが `run` と打っても :exec に転送
|
|
13
|
+
map "run" => :exec
|
|
14
|
+
|
|
15
|
+
desc "exec INPUT_IPYNB", "Run papermill with Ruby wrapper"
|
|
16
|
+
method_option :output, type: :string, aliases: "-o", required: true, desc: "Output notebook path"
|
|
17
|
+
method_option :kernel, type: :string, default: "python3", desc: "Jupyter kernel name"
|
|
18
|
+
method_option :params, type: :string, desc: "Path to params.json or JSON string"
|
|
19
|
+
method_option :cwd, type: :string, desc: "Working directory"
|
|
20
|
+
method_option :dry_run, type: :boolean, default: false, desc: "Print command only"
|
|
21
|
+
# ★ 追加: タグ実行
|
|
22
|
+
method_option :cell_tag, type: :string, desc: "Execute only cells with this tag (Papermill --cell_tag)"
|
|
23
|
+
method_option :cell_tags, type: :array, desc: "Execute only cells with these tags (space/comma-separated)"
|
|
24
|
+
|
|
25
|
+
def exec(input_ipynb)
|
|
26
|
+
# runner = RubyPyMill::Runner.new(kernel: options[:kernel], cwd: options[:cwd])
|
|
27
|
+
# --cell-tag / --cell-tags を正規化(nil/空/カンマを吸収)
|
|
28
|
+
tags = []
|
|
29
|
+
tags << options[:cell_tag] if options[:cell_tag]
|
|
30
|
+
tags.concat(options[:cell_tags]) if options[:cell_tags]
|
|
31
|
+
cell_tags = tags.compact.flat_map { |t| t.to_s.split(",") }.map(&:strip).reject(&:empty?)
|
|
32
|
+
|
|
33
|
+
runner = RubyPyMill::Runner.new(
|
|
34
|
+
kernel: options[:kernel],
|
|
35
|
+
cwd: options[:cwd],
|
|
36
|
+
cell_tags: cell_tags
|
|
37
|
+
)
|
|
38
|
+
ok = runner.run(
|
|
39
|
+
input_ipynb: input_ipynb,
|
|
40
|
+
output_ipynb: options[:output],
|
|
41
|
+
params_json: options[:params],
|
|
42
|
+
dry_run: options[:dry_run]
|
|
43
|
+
)
|
|
44
|
+
exit(ok ? 0 : 1)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
desc "version", "Show RubyPyMill version"
|
|
48
|
+
def version
|
|
49
|
+
puts RubyPyMill::VERSION
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
RubyPyMillCLI.start(ARGV)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# lib/ruby_pymill/api.rb
|
|
2
|
+
require "open3"
|
|
3
|
+
|
|
4
|
+
module RubyPyMill
|
|
5
|
+
module API
|
|
6
|
+
# Ruby から notebook を実行する公式API
|
|
7
|
+
#
|
|
8
|
+
# 例:
|
|
9
|
+
# RubyPyMill::API.run(
|
|
10
|
+
# notebook: "demo/notebooks/xxx.ipynb",
|
|
11
|
+
# output: "demo/outputs/out.ipynb",
|
|
12
|
+
# kernel: "rpymill",
|
|
13
|
+
# cell_tags:"setup,preprocess,report",
|
|
14
|
+
# params: "demo/params/kodama.json",
|
|
15
|
+
# log: "demo/logs/run_xxx.log"
|
|
16
|
+
# )
|
|
17
|
+
#
|
|
18
|
+
def self.run(
|
|
19
|
+
notebook:,
|
|
20
|
+
output:,
|
|
21
|
+
kernel: "rpymill",
|
|
22
|
+
cell_tags: nil,
|
|
23
|
+
params: nil,
|
|
24
|
+
log: nil
|
|
25
|
+
)
|
|
26
|
+
cmd = [
|
|
27
|
+
"ruby_pymill", "exec",
|
|
28
|
+
notebook,
|
|
29
|
+
"--output", output,
|
|
30
|
+
"--kernel", kernel,
|
|
31
|
+
]
|
|
32
|
+
cmd += ["--cell-tag", cell_tags] if cell_tags && !cell_tags.empty?
|
|
33
|
+
cmd += ["--params", params] if params && !params.empty?
|
|
34
|
+
|
|
35
|
+
stdout_all = +""
|
|
36
|
+
status = nil
|
|
37
|
+
|
|
38
|
+
Open3.popen2e(*cmd) do |_stdin, stdout_err, wait_thr|
|
|
39
|
+
stdout_err.each do |line|
|
|
40
|
+
print line # コンソールにも流す
|
|
41
|
+
stdout_all << line # ログにも残す
|
|
42
|
+
end
|
|
43
|
+
status = wait_thr.value
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
if log
|
|
47
|
+
log_dir = File.dirname(log)
|
|
48
|
+
Dir.mkdir(log_dir) unless Dir.exist?(log_dir)
|
|
49
|
+
File.write(log, stdout_all)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
unless status&.success?
|
|
53
|
+
raise "ruby_pymill failed (status=#{status.exitstatus})"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
stdout_all
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
data/lib/ruby_pymill.rb
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "ruby_pymill/version"
|
|
4
|
+
require_relative "ruby_pymill/api"
|
|
5
|
+
require "json"
|
|
6
|
+
require "open3"
|
|
7
|
+
require "tmpdir"
|
|
8
|
+
|
|
9
|
+
module RubyPyMill
|
|
10
|
+
class Runner
|
|
11
|
+
def initialize(kernel: "rpymill", cwd: nil, logger: $stdout, cell_tags: [])
|
|
12
|
+
@kernel = kernel
|
|
13
|
+
@cwd = cwd
|
|
14
|
+
@logger = logger
|
|
15
|
+
@cell_tags = normalize_tags(cell_tags) # ← 文字列/配列どちらでもOKに
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# params_json: path to json file or JSON string
|
|
19
|
+
# cell_tags : initialize の指定を上書き可能(カンマ/空白区切り文字列 or 配列)
|
|
20
|
+
def run(input_ipynb:, output_ipynb:, params_json: nil, kernel: nil, dry_run: false, cell_tags: nil)
|
|
21
|
+
k = kernel || @kernel
|
|
22
|
+
tags = normalize_tags(cell_tags.nil? ? @cell_tags : cell_tags) # ← 正規化して複数タグ対応
|
|
23
|
+
|
|
24
|
+
# 1) タグ指定があればノートを事前フィルタ
|
|
25
|
+
filtered_input = tags.empty? ? input_ipynb : filter_by_tags(input_ipynb, tags)
|
|
26
|
+
|
|
27
|
+
# 2) papermill コマンドを組み立て
|
|
28
|
+
args = ["papermill", filtered_input, output_ipynb, "-k", k]
|
|
29
|
+
|
|
30
|
+
if params_json
|
|
31
|
+
if File.exist?(params_json)
|
|
32
|
+
args += ["-f", params_json]
|
|
33
|
+
else
|
|
34
|
+
tmpdir = Dir.mktmpdir("rpymill_params")
|
|
35
|
+
tmp = File.join(tmpdir, "params.json")
|
|
36
|
+
File.write(tmp, params_json)
|
|
37
|
+
args += ["-f", tmp]
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
cmd = args.join(" ")
|
|
42
|
+
@logger.puts "[RubyPyMill] #{dry_run ? 'DRY' : 'RUN'}: #{cmd}"
|
|
43
|
+
return true if dry_run
|
|
44
|
+
|
|
45
|
+
stdout_str, stderr_str, status = Open3.capture3(cmd, chdir: @cwd || Dir.pwd)
|
|
46
|
+
@logger.puts stdout_str unless stdout_str.empty?
|
|
47
|
+
@logger.puts stderr_str unless stderr_str.empty?
|
|
48
|
+
status.success?
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
# "preprocess, analysis" / "preprocess analysis" / ["preprocess","analysis"] いずれもOKに
|
|
54
|
+
def normalize_tags(value)
|
|
55
|
+
Array(value)
|
|
56
|
+
.flat_map { |v| v.to_s.split(/[,\s]+/) }
|
|
57
|
+
.map(&:strip)
|
|
58
|
+
.reject(&:empty?)
|
|
59
|
+
.uniq
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# 指定タグ OR マッチ。parameters / injected-parameters は常に残す
|
|
63
|
+
def filter_by_tags(ipynb_path, tags)
|
|
64
|
+
data = JSON.parse(File.read(ipynb_path))
|
|
65
|
+
cells = data["cells"] || []
|
|
66
|
+
|
|
67
|
+
kept = cells.select do |cell|
|
|
68
|
+
ctags = Array(cell.dig("metadata", "tags")).map(&:to_s)
|
|
69
|
+
(ctags & %w[parameters injected-parameters]).any? || (ctags & tags).any?
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# 万一ゼロ件なら parameters 系のみ確保
|
|
73
|
+
if kept.empty?
|
|
74
|
+
kept = cells.select { |c| (Array(c.dig("metadata", "tags")) & %w[parameters injected-parameters]).any? }
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
filtered = data.dup
|
|
78
|
+
filtered["cells"] = kept
|
|
79
|
+
|
|
80
|
+
tmpdir = Dir.mktmpdir("rpymill_nb")
|
|
81
|
+
tmpnb = File.join(tmpdir, File.basename(ipynb_path, ".ipynb") + ".filtered.ipynb")
|
|
82
|
+
File.write(tmpnb, JSON.pretty_generate(filtered))
|
|
83
|
+
tmpnb
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: ruby_pymill
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.2.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Hiroshi Inoue
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-01-10 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: json
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '2.0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '2.0'
|
|
27
|
+
description: Ruby orchestrator to filter/execute tagged Jupyter cells via Papermill.
|
|
28
|
+
email:
|
|
29
|
+
- hiroshi.inoue@gmail.com
|
|
30
|
+
executables:
|
|
31
|
+
- ruby_pymill
|
|
32
|
+
extensions: []
|
|
33
|
+
extra_rdoc_files: []
|
|
34
|
+
files:
|
|
35
|
+
- LICENSE
|
|
36
|
+
- README.md
|
|
37
|
+
- bin/ruby_pymill
|
|
38
|
+
- lib/ruby_pymill.rb
|
|
39
|
+
- lib/ruby_pymill/api.rb
|
|
40
|
+
- lib/ruby_pymill/version.rb
|
|
41
|
+
homepage: https://github.com/inoue-0852/RubyPyMill
|
|
42
|
+
licenses:
|
|
43
|
+
- MIT
|
|
44
|
+
metadata: {}
|
|
45
|
+
post_install_message:
|
|
46
|
+
rdoc_options: []
|
|
47
|
+
require_paths:
|
|
48
|
+
- lib
|
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: '3.0'
|
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
55
|
+
requirements:
|
|
56
|
+
- - ">="
|
|
57
|
+
- !ruby/object:Gem::Version
|
|
58
|
+
version: '0'
|
|
59
|
+
requirements: []
|
|
60
|
+
rubygems_version: 3.5.11
|
|
61
|
+
signing_key:
|
|
62
|
+
specification_version: 4
|
|
63
|
+
summary: Run Jupyter Notebooks from Ruby using Papermill
|
|
64
|
+
test_files: []
|