atlas2ipynb 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YTg5MzhiYWI1NzVjODhiNzgyZjQ4OTY2NDc0NjA0ZjM5YTY5ZTJkNg==
5
+ data.tar.gz: !binary |-
6
+ Y2U5MTJhNmMwNTE3NTM0ZmRkZDlhMzhhMjg2NzMzOGRmZDA5NTJmMg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ N2VmMjBmYzA2OTNhZjhiNDdkMjliN2E5OTljMGU3NzMyNDUwNDJiNjMxMmRi
10
+ ZTEzNWNkNzk5MTFmY2I2YzY0NWQ4OTZhNDYyZTg1NjQ0ODlkNTRmY2ZhMDFi
11
+ ODg3ZTBjYjA1Nzg2NDE4Yjg1NDFmZWEzZWMyYWY3ODRiZDIzOWE=
12
+ data.tar.gz: !binary |-
13
+ ZjA2YTMwNjU4ODU4NGYzN2IxNTY5OTkxMzM5YjFiYmFhNWRhNjRiZWRmOGQ2
14
+ YzA1MzhhZTM2MDZhMTY1NDUzMTBlNmFmMmViMjQyZjRmMjQ4MDIwNWNjZjY2
15
+ NDBjNzE2MjkxZTdlOTIzZTMxNzBlNjY0YmY5ZjVhNTVlYTY1Yjg=
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in atlas2ipynb.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Andrew Odewahn
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Atlas2ipynb
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'atlas2ipynb'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install atlas2ipynb
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it ( http://github.com/<my-github-username>/atlas2ipynb/fork )
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "atlas2ipynb"
7
+ spec.version = "0.0.1"
8
+ spec.authors = ["Andrew Odewahn"]
9
+ spec.email = ["odewahn@oreilly.com"]
10
+ spec.summary = %q{Converts Atlas HTML build into iPythyon Notebooks.}
11
+ spec.description = %q{Converts all ch*.html files from Atlas into iPythyon Notebooks.}
12
+ spec.homepage = "https://github.com/odewahn/atlas2ipynb"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.add_development_dependency "bundler", "~> 1.5"
21
+ spec.add_development_dependency "rake"
22
+
23
+ spec.add_dependency "nokogiri", "~> 1.6"
24
+ spec.add_dependency "json", "~> 1.8"
25
+ spec.add_dependency "active_support", '~> 3.0', '>= 3.0.0'
26
+ spec.add_dependency "i18n"
27
+
28
+ end
data/bin/atlas2ipynb ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+
4
+ require 'rubygems'
5
+ require 'atlas2ipynb'
6
+
7
+
8
+ c = Atlas2ipynb::Converter.new
9
+ c.convert!
@@ -0,0 +1,140 @@
1
+ require 'nokogiri'
2
+ require 'json'
3
+ require 'i18n'
4
+ require 'active_support/inflector'
5
+
6
+ # If running in irb, don't forget to do this:
7
+ # require 'bundler'
8
+ # Bundler.require
9
+ #
10
+ module Atlas2ipynb
11
+
12
+ class Converter
13
+
14
+ #*************************************************************************************
15
+ # ipynb uses the plain filename as the user's index page, so we need to make something
16
+ # a name that is "meaningful". To do this, I turn the <h1> from the first section, which
17
+ # should be the chapter title, into a filename, per
18
+ # http://stackoverflow.com/questions/1939333/how-to-make-a-ruby-string-safe-for-a-filesystem
19
+ # Also, since we might have special chars and foreign language isseus, we transliterate
20
+ # the string to strip out accents and such, per:
21
+ # http://stackoverflow.com/questions/225471/how-do-i-replace-accented-latin-characters-in-ruby
22
+ # (I'm not sure what it will do with CJKV languages.) Finally, it ensures that the
23
+ # string is less than 50 chars long, but breaks it on a word boundry so that it
24
+ # looks "nice"
25
+ #*************************************************************************************
26
+ def string_to_filename(s)
27
+ I18n.enforce_available_locales = false
28
+ out = ActiveSupport::Inflector.transliterate(s).downcase
29
+ out.gsub!(/^.*(\\|\/)/,'')
30
+ out.gsub!(/[^0-9A-Za-z]/,"_")
31
+ # truncate the name at 50 chars, but do it "nicely"
32
+ candidate = out[0,50].split("_") - [""]
33
+ return candidate.join("_")
34
+ end
35
+
36
+
37
+ #*************************************************************************************
38
+ # This function processes the raw HTML sections using nokogiri. It looks for
39
+ # headers or code; everything else is treated as HTML. This can be passed to
40
+ # ipynb directly since markdown is a superset of HTML. Although I'd originally
41
+ # planned to convert HTML to markdown, this proved infeasible with all the many
42
+ # edge cases in conversion, such has mathml
43
+ #*************************************************************************************
44
+ def process_section(n, level, out)
45
+ n.children.each do |c|
46
+ case c.name
47
+ when "section"
48
+ process_section(c, level+1, out) # since a section is just a container, we need to recurse to get the content
49
+ when "h1", "h2", "h3", "h4", "h4", "h5", "h6"
50
+ out << {
51
+ "cell_type" => "heading",
52
+ "level" => level,
53
+ "metadata" => {},
54
+ "source" => c.text
55
+ }
56
+ when "pre","code"
57
+ out << {
58
+ "cell_type" => "code",
59
+ "collapsed" => false,
60
+ "input" => c.text,
61
+ "language" => c.attributes["data-code-language"] || "python",
62
+ "metadata" => {},
63
+ "outputs" => []
64
+ }
65
+ else
66
+ out << {
67
+ "cell_type" => "markdown",
68
+ "metadata" => {},
69
+ "source" => c.to_s
70
+ }
71
+ end
72
+ end
73
+ return out
74
+ end
75
+
76
+
77
+ #*************************************************************************************
78
+ # This function takes a file name with HTML content, parses it with nokogiri, does some
79
+ # post-processing on the image links, and then calls process_section to convert
80
+ # each element to the corrseponding ipynb cell type (markdown, header, or code)
81
+ #*************************************************************************************
82
+ def html_to_ipynb(fn)
83
+ #
84
+ # Open the file and parse it w/nokogiri
85
+ doc = Nokogiri::HTML(IO.read(fn), nil, 'utf-8')
86
+ #
87
+ # Pre-process the doc to fix image URLs so that images can be served by the notebook server
88
+ # You do this by prepending "files/" to the image's relative URL, per this question on stackoverflow:
89
+ # "inserting image into ipython notebook markdown"
90
+ doc.css("figure img").each do |img|
91
+ src = img.attributes["src"].value
92
+ img.attributes["src"].value = src.split("/").unshift("files").join("/") #prepends "files" to the src
93
+ end
94
+ #
95
+ # Grab the first h1 tag to use as part of the notebooks filename
96
+ #
97
+ chapter_title = doc.css("section h1").first.text
98
+ #
99
+ # combine the cells we just computed with the ipynb header information
100
+ #
101
+ notebook = {
102
+ "metadata" => {
103
+ "name" => chapter_title
104
+ },
105
+ "nbformat" => 3,
106
+ "nbformat_minor"=> 0,
107
+ "worksheets" => [
108
+ {
109
+ "cells" => self.process_section(doc.css("section").first,1, []),
110
+ "metadata" => {}
111
+ }
112
+ ]
113
+ }
114
+ return notebook
115
+ end
116
+
117
+
118
+ #*************************************************************************************
119
+ # Convert all chapter files in the directory into ipynb
120
+ #*************************************************************************************
121
+ def convert!
122
+ puts "Searching for ch*.html files from Atlas"
123
+ Dir["ch*.html"].each do |fn|
124
+ out = html_to_ipynb(fn)
125
+ # Compute the new filename, which is the original filename
126
+ # with the ".html" (last 5 chars) replaced with ".ipynb".
127
+ title_fn = self.string_to_filename(out['metadata']['name'])
128
+ ipynb_fn = "#{fn[0,fn.length-5]}_#{title_fn}.ipynb"
129
+ puts "... Converting #{fn} to #{ipynb_fn}"
130
+ # Create the file
131
+ f = File.open(ipynb_fn, 'w')
132
+ f.write JSON.pretty_generate(out)
133
+ f.close
134
+ end
135
+ puts "Done!"
136
+ end
137
+
138
+ end
139
+
140
+ end
metadata ADDED
@@ -0,0 +1,143 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: atlas2ipynb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Odewahn
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-03-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: json
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.8'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: '1.8'
69
+ - !ruby/object:Gem::Dependency
70
+ name: active_support
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: '3.0'
76
+ - - ! '>='
77
+ - !ruby/object:Gem::Version
78
+ version: 3.0.0
79
+ type: :runtime
80
+ prerelease: false
81
+ version_requirements: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: '3.0'
86
+ - - ! '>='
87
+ - !ruby/object:Gem::Version
88
+ version: 3.0.0
89
+ - !ruby/object:Gem::Dependency
90
+ name: i18n
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ! '>='
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ type: :runtime
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ! '>='
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ description: Converts all ch*.html files from Atlas into iPythyon Notebooks.
104
+ email:
105
+ - odewahn@oreilly.com
106
+ executables:
107
+ - atlas2ipynb
108
+ extensions: []
109
+ extra_rdoc_files: []
110
+ files:
111
+ - .gitignore
112
+ - Gemfile
113
+ - LICENSE.txt
114
+ - README.md
115
+ - Rakefile
116
+ - atlas2ipynb.gemspec
117
+ - bin/atlas2ipynb
118
+ - lib/atlas2ipynb.rb
119
+ homepage: https://github.com/odewahn/atlas2ipynb
120
+ licenses:
121
+ - MIT
122
+ metadata: {}
123
+ post_install_message:
124
+ rdoc_options: []
125
+ require_paths:
126
+ - lib
127
+ required_ruby_version: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ required_rubygems_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - ! '>='
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ requirements: []
138
+ rubyforge_project:
139
+ rubygems_version: 2.2.2
140
+ signing_key:
141
+ specification_version: 4
142
+ summary: Converts Atlas HTML build into iPythyon Notebooks.
143
+ test_files: []