atlas2ipynb 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YTg5MzhiYWI1NzVjODhiNzgyZjQ4OTY2NDc0NjA0ZjM5YTY5ZTJkNg==
5
+ data.tar.gz: !binary |-
6
+ Y2U5MTJhNmMwNTE3NTM0ZmRkZDlhMzhhMjg2NzMzOGRmZDA5NTJmMg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ N2VmMjBmYzA2OTNhZjhiNDdkMjliN2E5OTljMGU3NzMyNDUwNDJiNjMxMmRi
10
+ ZTEzNWNkNzk5MTFmY2I2YzY0NWQ4OTZhNDYyZTg1NjQ0ODlkNTRmY2ZhMDFi
11
+ ODg3ZTBjYjA1Nzg2NDE4Yjg1NDFmZWEzZWMyYWY3ODRiZDIzOWE=
12
+ data.tar.gz: !binary |-
13
+ ZjA2YTMwNjU4ODU4NGYzN2IxNTY5OTkxMzM5YjFiYmFhNWRhNjRiZWRmOGQ2
14
+ YzA1MzhhZTM2MDZhMTY1NDUzMTBlNmFmMmViMjQyZjRmMjQ4MDIwNWNjZjY2
15
+ NDBjNzE2MjkxZTdlOTIzZTMxNzBlNjY0YmY5ZjVhNTVlYTY1Yjg=
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in atlas2ipynb.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Andrew Odewahn
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Atlas2ipynb
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'atlas2ipynb'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install atlas2ipynb
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it ( http://github.com/<my-github-username>/atlas2ipynb/fork )
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "atlas2ipynb"
7
+ spec.version = "0.0.1"
8
+ spec.authors = ["Andrew Odewahn"]
9
+ spec.email = ["odewahn@oreilly.com"]
10
+ spec.summary = %q{Converts Atlas HTML build into iPythyon Notebooks.}
11
+ spec.description = %q{Converts all ch*.html files from Atlas into iPythyon Notebooks.}
12
+ spec.homepage = "https://github.com/odewahn/atlas2ipynb"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.5"
21
+ spec.add_development_dependency "rake"
22
+
23
+ spec.add_dependency "nokogiri", "~> 1.6"
24
+ spec.add_dependency "json", "~> 1.8"
25
+ spec.add_dependency "active_support", '~> 3.0', '>= 3.0.0'
26
+ spec.add_dependency "i18n"
27
+
28
+ end
data/bin/atlas2ipynb ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+
4
+ require 'rubygems'
5
+ require 'atlas2ipynb'
6
+
7
+
8
+ c = Atlas2ipynb::Converter.new
9
+ c.convert!
@@ -0,0 +1,140 @@
1
+ require 'nokogiri'
2
+ require 'json'
3
+ require 'i18n'
4
+ require 'active_support/inflector'
5
+
6
+ # If running in irb, don't forget to do this:
7
+ # require 'bundler'
8
+ # Bundler.require
9
+ #
10
+ module Atlas2ipynb
11
+
12
+ class Converter
13
+
14
+ #*************************************************************************************
15
+ # ipynb uses the plain filename as the user's index page, so we need to make something
16
+ # a name that is "meaningful". To do this, I turn the <h1> from the first section, which
17
+ # should be the chapter title, into a filename, per
18
+ # http://stackoverflow.com/questions/1939333/how-to-make-a-ruby-string-safe-for-a-filesystem
19
+ # Also, since we might have special chars and foreign language isseus, we transliterate
20
+ # the string to strip out accents and such, per:
21
+ # http://stackoverflow.com/questions/225471/how-do-i-replace-accented-latin-characters-in-ruby
22
+ # (I'm not sure what it will do with CJKV languages.) Finally, it ensures that the
23
+ # string is less than 50 chars long, but breaks it on a word boundry so that it
24
+ # looks "nice"
25
+ #*************************************************************************************
26
+ def string_to_filename(s)
27
+ I18n.enforce_available_locales = false
28
+ out = ActiveSupport::Inflector.transliterate(s).downcase
29
+ out.gsub!(/^.*(\\|\/)/,'')
30
+ out.gsub!(/[^0-9A-Za-z]/,"_")
31
+ # truncate the name at 50 chars, but do it "nicely"
32
+ candidate = out[0,50].split("_") - [""]
33
+ return candidate.join("_")
34
+ end
35
+
36
+
37
+ #*************************************************************************************
38
+ # This function processes the raw HTML sections using nokogiri. It looks for
39
+ # headers or code; everything else is treated as HTML. This can be passed to
40
+ # ipynb directly since markdown is a superset of HTML. Although I'd originally
41
+ # planned to convert HTML to markdown, this proved infeasible with all the many
42
+ # edge cases in conversion, such has mathml
43
+ #*************************************************************************************
44
+ def process_section(n, level, out)
45
+ n.children.each do |c|
46
+ case c.name
47
+ when "section"
48
+ process_section(c, level+1, out) # since a section is just a container, we need to recurse to get the content
49
+ when "h1", "h2", "h3", "h4", "h4", "h5", "h6"
50
+ out << {
51
+ "cell_type" => "heading",
52
+ "level" => level,
53
+ "metadata" => {},
54
+ "source" => c.text
55
+ }
56
+ when "pre","code"
57
+ out << {
58
+ "cell_type" => "code",
59
+ "collapsed" => false,
60
+ "input" => c.text,
61
+ "language" => c.attributes["data-code-language"] || "python",
62
+ "metadata" => {},
63
+ "outputs" => []
64
+ }
65
+ else
66
+ out << {
67
+ "cell_type" => "markdown",
68
+ "metadata" => {},
69
+ "source" => c.to_s
70
+ }
71
+ end
72
+ end
73
+ return out
74
+ end
75
+
76
+
77
+ #*************************************************************************************
78
+ # This function takes a file name with HTML content, parses it with nokogiri, does some
79
+ # post-processing on the image links, and then calls process_section to convert
80
+ # each element to the corrseponding ipynb cell type (markdown, header, or code)
81
+ #*************************************************************************************
82
+ def html_to_ipynb(fn)
83
+ #
84
+ # Open the file and parse it w/nokogiri
85
+ doc = Nokogiri::HTML(IO.read(fn), nil, 'utf-8')
86
+ #
87
+ # Pre-process the doc to fix image URLs so that images can be served by the notebook server
88
+ # You do this by prepending "files/" to the image's relative URL, per this question on stackoverflow:
89
+ # "inserting image into ipython notebook markdown"
90
+ doc.css("figure img").each do |img|
91
+ src = img.attributes["src"].value
92
+ img.attributes["src"].value = src.split("/").unshift("files").join("/") #prepends "files" to the src
93
+ end
94
+ #
95
+ # Grab the first h1 tag to use as part of the notebooks filename
96
+ #
97
+ chapter_title = doc.css("section h1").first.text
98
+ #
99
+ # combine the cells we just computed with the ipynb header information
100
+ #
101
+ notebook = {
102
+ "metadata" => {
103
+ "name" => chapter_title
104
+ },
105
+ "nbformat" => 3,
106
+ "nbformat_minor"=> 0,
107
+ "worksheets" => [
108
+ {
109
+ "cells" => self.process_section(doc.css("section").first,1, []),
110
+ "metadata" => {}
111
+ }
112
+ ]
113
+ }
114
+ return notebook
115
+ end
116
+
117
+
118
+ #*************************************************************************************
119
+ # Convert all chapter files in the directory into ipynb
120
+ #*************************************************************************************
121
+ def convert!
122
+ puts "Searching for ch*.html files from Atlas"
123
+ Dir["ch*.html"].each do |fn|
124
+ out = html_to_ipynb(fn)
125
+ # Compute the new filename, which is the original filename
126
+ # with the ".html" (last 5 chars) replaced with ".ipynb".
127
+ title_fn = self.string_to_filename(out['metadata']['name'])
128
+ ipynb_fn = "#{fn[0,fn.length-5]}_#{title_fn}.ipynb"
129
+ puts "... Converting #{fn} to #{ipynb_fn}"
130
+ # Create the file
131
+ f = File.open(ipynb_fn, 'w')
132
+ f.write JSON.pretty_generate(out)
133
+ f.close
134
+ end
135
+ puts "Done!"
136
+ end
137
+
138
+ end
139
+
140
+ end
metadata ADDED
@@ -0,0 +1,143 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: atlas2ipynb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Odewahn
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-03-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: json
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.8'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '1.8'
69
+ - !ruby/object:Gem::Dependency
70
+ name: active_support
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ - - ! '>='
77
+ - !ruby/object:Gem::Version
78
+ version: 3.0.0
79
+ type: :runtime
80
+ prerelease: false
81
+ version_requirements: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: '3.0'
86
+ - - ! '>='
87
+ - !ruby/object:Gem::Version
88
+ version: 3.0.0
89
+ - !ruby/object:Gem::Dependency
90
+ name: i18n
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ! '>='
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ type: :runtime
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ! '>='
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ description: Converts all ch*.html files from Atlas into iPythyon Notebooks.
104
+ email:
105
+ - odewahn@oreilly.com
106
+ executables:
107
+ - atlas2ipynb
108
+ extensions: []
109
+ extra_rdoc_files: []
110
+ files:
111
+ - .gitignore
112
+ - Gemfile
113
+ - LICENSE.txt
114
+ - README.md
115
+ - Rakefile
116
+ - atlas2ipynb.gemspec
117
+ - bin/atlas2ipynb
118
+ - lib/atlas2ipynb.rb
119
+ homepage: https://github.com/odewahn/atlas2ipynb
120
+ licenses:
121
+ - MIT
122
+ metadata: {}
123
+ post_install_message:
124
+ rdoc_options: []
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ required_rubygems_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ! '>='
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ requirements: []
138
+ rubyforge_project:
139
+ rubygems_version: 2.2.2
140
+ signing_key:
141
+ specification_version: 4
142
+ summary: Converts Atlas HTML build into iPythyon Notebooks.
143
+ test_files: []