openwebslides-converter 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +54 -0
- data/.rdoc_options +16 -0
- data/.rspec +1 -0
- data/.rubocop.yml +16 -0
- data/.travis.yml +6 -0
- data/Gemfile +5 -0
- data/LICENSE.md +20 -0
- data/README.md +23 -0
- data/Rakefile +24 -0
- data/lib/openwebslides/converter.rb +8 -0
- data/lib/openwebslides/converter/content.rb +13 -0
- data/lib/openwebslides/converter/content/content_item.rb +12 -0
- data/lib/openwebslides/converter/content/heading.rb +13 -0
- data/lib/openwebslides/converter/content/paragraph.rb +13 -0
- data/lib/openwebslides/converter/content/properties/container.rb +21 -0
- data/lib/openwebslides/converter/content/properties/identifier.rb +41 -0
- data/lib/openwebslides/converter/content/properties/metadata.rb +24 -0
- data/lib/openwebslides/converter/content/properties/subable.rb +21 -0
- data/lib/openwebslides/converter/content/properties/text.rb +21 -0
- data/lib/openwebslides/converter/content/properties/type.rb +21 -0
- data/lib/openwebslides/converter/content/root.rb +11 -0
- data/lib/openwebslides/converter/helpers/sanitization.rb +21 -0
- data/lib/openwebslides/converter/pressbooks.rb +181 -0
- data/lib/openwebslides/converter/result.rb +16 -0
- data/lib/openwebslides/converter/version.rb +7 -0
- data/openwebslides-converter.gemspec +43 -0
- data/spec/content/content_item_spec.rb +25 -0
- data/spec/content/heading_spec.rb +27 -0
- data/spec/content/paragraph_spec.rb +27 -0
- data/spec/content/properties/container_spec.rb +48 -0
- data/spec/content/properties/identifier_spec.rb +51 -0
- data/spec/content/properties/metadata_spec.rb +48 -0
- data/spec/content/properties/subable_spec.rb +48 -0
- data/spec/content/properties/text_spec.rb +48 -0
- data/spec/content/properties/type_spec.rb +48 -0
- data/spec/content/root_spec.rb +26 -0
- data/spec/converter_spec.rb +10 -0
- data/spec/spec_helper.rb +11 -0
- metadata +221 -0
@@ -0,0 +1,181 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'reverse_markdown'
|
5
|
+
|
6
|
+
module OpenWebslides
|
7
|
+
module Converter
|
8
|
+
class Pressbooks
|
9
|
+
include Helpers::Sanitization
|
10
|
+
|
11
|
+
attr_accessor :html,
|
12
|
+
:result
|
13
|
+
|
14
|
+
##
|
15
|
+
# Convert an XHTML document to Open Webslides
|
16
|
+
#
|
17
|
+
# @param [String] html XHTML document string
|
18
|
+
# @return [Result] Result object containing all data
|
19
|
+
#
|
20
|
+
def from_xhtml(html)
|
21
|
+
@html = Nokogiri::HTML html
|
22
|
+
@result = Result.new
|
23
|
+
|
24
|
+
root = Content::Root.new
|
25
|
+
|
26
|
+
parse_metadata
|
27
|
+
|
28
|
+
root.child_item_ids << parse_matter('front').id
|
29
|
+
root.child_item_ids.concat parse_parts.map(&:id)
|
30
|
+
root.child_item_ids << parse_matter('back').id
|
31
|
+
|
32
|
+
result.content_items << root
|
33
|
+
|
34
|
+
result
|
35
|
+
end
|
36
|
+
|
37
|
+
protected
|
38
|
+
|
39
|
+
##
|
40
|
+
# Find and parse metadata
|
41
|
+
#
|
42
|
+
def parse_metadata
|
43
|
+
result.title = html.at('meta[name="pb-title"]')['content']
|
44
|
+
result.author = html.at('meta[name="pb-authors"]')['content']
|
45
|
+
result.license = html.at('meta[name="pb-book-license"]')['content']
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# Find and parse matter
|
50
|
+
#
|
51
|
+
# @param [String] position 'front' or 'back'
|
52
|
+
#
|
53
|
+
def parse_matter(position)
|
54
|
+
# Create new heading
|
55
|
+
heading = Content::Heading.new
|
56
|
+
|
57
|
+
# Set title
|
58
|
+
heading.text = html.at(".#{position}-matter-title").content
|
59
|
+
|
60
|
+
# Parse front matter paragraphs
|
61
|
+
paragraphs = parse_paragraphs html.at(".#{position}-matter-ugc")
|
62
|
+
|
63
|
+
# Add paragraphs to the front matter header
|
64
|
+
heading.sub_item_ids = paragraphs.map(&:id)
|
65
|
+
|
66
|
+
# Add front matter heading to result
|
67
|
+
result.content_items << heading
|
68
|
+
|
69
|
+
heading
|
70
|
+
end
|
71
|
+
|
72
|
+
##
|
73
|
+
# Find and parse parts
|
74
|
+
#
|
75
|
+
def parse_parts
|
76
|
+
parts = []
|
77
|
+
|
78
|
+
html.search('div.part, div.chapter').each do |div|
|
79
|
+
if div.classes.include? 'part'
|
80
|
+
# Create new part heading
|
81
|
+
parts << Content::Heading.new
|
82
|
+
|
83
|
+
# Set title
|
84
|
+
parts.last.text = div.at('.part-title').content
|
85
|
+
|
86
|
+
# Part intro (paragraphs under part header)
|
87
|
+
intro = parse_paragraphs div
|
88
|
+
|
89
|
+
# Add intro to part
|
90
|
+
parts.last.sub_item_ids = intro.map(&:id)
|
91
|
+
elsif div.classes.include? 'chapter'
|
92
|
+
chapter = parse_chapter div
|
93
|
+
|
94
|
+
# Add chapter heading to the part
|
95
|
+
parts.last.sub_item_ids << chapter.id
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Add parts to result
|
100
|
+
result.content_items.concat parts
|
101
|
+
|
102
|
+
parts
|
103
|
+
end
|
104
|
+
|
105
|
+
##
|
106
|
+
# Parse chapter XHTML object into content item
|
107
|
+
def parse_chapter(html)
|
108
|
+
# Create new chapter
|
109
|
+
chapter = Content::Heading.new
|
110
|
+
|
111
|
+
# Set title
|
112
|
+
chapter.text = html.at('.chapter-title').content
|
113
|
+
|
114
|
+
# Add chapter heading to result
|
115
|
+
result.content_items << chapter
|
116
|
+
|
117
|
+
# Start with chapter heading as current heading
|
118
|
+
heading = chapter
|
119
|
+
|
120
|
+
# Parse chapter content
|
121
|
+
html.at('.chapter-ugc').children.each do |child|
|
122
|
+
if child.name == 'p'
|
123
|
+
# Parse paragraph
|
124
|
+
paragraph = parse_paragraph child
|
125
|
+
|
126
|
+
# Add paragraph to current heading
|
127
|
+
heading.sub_item_ids << paragraph.id if paragraph
|
128
|
+
elsif child.name == 'h4'
|
129
|
+
# Create new heading
|
130
|
+
h = Content::Heading.new
|
131
|
+
|
132
|
+
# Set title
|
133
|
+
h.text = child.content
|
134
|
+
|
135
|
+
# Add heading to result
|
136
|
+
result.content_items << h
|
137
|
+
|
138
|
+
# Add heading to current heading
|
139
|
+
chapter.sub_item_ids << h.id
|
140
|
+
|
141
|
+
# Replace current heading
|
142
|
+
heading = h
|
143
|
+
elsif child.name == 'ul' || child.name == 'ol'
|
144
|
+
# Extract list items
|
145
|
+
paragraphs = child.search('li').map { |l| parse_paragraph l }
|
146
|
+
|
147
|
+
# Add paragraphs to heading
|
148
|
+
heading.sub_item_ids.concat paragraphs.map(&:id)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
chapter
|
153
|
+
end
|
154
|
+
|
155
|
+
##
|
156
|
+
# Parse XHTML object into paragraph content items
|
157
|
+
#
|
158
|
+
def parse_paragraphs(html)
|
159
|
+
html.search('p').map { |p| parse_paragraph p }
|
160
|
+
end
|
161
|
+
|
162
|
+
##
|
163
|
+
# Parse paragraph XHTML object into content item
|
164
|
+
#
|
165
|
+
def parse_paragraph(html)
|
166
|
+
# Create new paragraph
|
167
|
+
paragraph = Content::Paragraph.new
|
168
|
+
|
169
|
+
# Extract and sanitize paragraph contents
|
170
|
+
paragraph.text = sanitize ReverseMarkdown.convert html.to_xhtml
|
171
|
+
|
172
|
+
return nil if paragraph.text.empty?
|
173
|
+
|
174
|
+
# Add paragraph to result
|
175
|
+
result.content_items << paragraph
|
176
|
+
|
177
|
+
paragraph
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'openwebslides/converter/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |gem|
|
8
|
+
gem.name = 'openwebslides-converter'
|
9
|
+
gem.version = OpenWebslides::Converter::VERSION
|
10
|
+
gem.summary = 'Open Webslides Converter'
|
11
|
+
gem.description = 'Converts arbitrary content data formats to Open Webslides data format'
|
12
|
+
gem.license = 'MIT'
|
13
|
+
gem.authors = ['Florian Dejonckheere']
|
14
|
+
gem.email = 'florian@floriandejonckheere.be'
|
15
|
+
gem.homepage = 'https://rubygems.org/gems/openwebslides-converter'
|
16
|
+
|
17
|
+
gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
18
|
+
|
19
|
+
`git submodule --quiet foreach --recursive pwd`.split($INPUT_RECORD_SEPARATOR).each do |submodule|
|
20
|
+
submodule.sub!("#{Dir.pwd}/", '')
|
21
|
+
|
22
|
+
Dir.chdir(submodule) do
|
23
|
+
`git ls-files`.split($INPUT_RECORD_SEPARATOR).map do |subpath|
|
24
|
+
gem.files << File.join(submodule, subpath)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
30
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
31
|
+
gem.require_paths = ['lib']
|
32
|
+
|
33
|
+
gem.add_dependency 'nokogiri', '~> 1.8'
|
34
|
+
gem.add_dependency 'reverse_markdown', '~> 1.1'
|
35
|
+
|
36
|
+
gem.add_development_dependency 'bundler', '~> 1.10'
|
37
|
+
gem.add_development_dependency 'coveralls', '~> 0.8'
|
38
|
+
gem.add_development_dependency 'rake', '~> 10.0'
|
39
|
+
gem.add_development_dependency 'rdoc', '~> 4.0'
|
40
|
+
gem.add_development_dependency 'rspec', '~> 3.0'
|
41
|
+
gem.add_development_dependency 'rubocop', '~> 0.58'
|
42
|
+
gem.add_development_dependency 'rubygems-tasks', '~> 0.2'
|
43
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::ContentItem do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
##
|
16
|
+
# Subject
|
17
|
+
#
|
18
|
+
##
|
19
|
+
# Tests
|
20
|
+
#
|
21
|
+
describe 'included properties' do
|
22
|
+
it { is_expected.to respond_to :id }
|
23
|
+
it { is_expected.to respond_to :type }
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::Heading do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
##
|
16
|
+
# Subject
|
17
|
+
#
|
18
|
+
##
|
19
|
+
# Tests
|
20
|
+
#
|
21
|
+
describe 'included properties' do
|
22
|
+
it { is_expected.to respond_to :id }
|
23
|
+
it { is_expected.to respond_to :type }
|
24
|
+
it { is_expected.to respond_to :text }
|
25
|
+
it { is_expected.to respond_to :sub_item_ids }
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::Paragraph do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
##
|
16
|
+
# Subject
|
17
|
+
#
|
18
|
+
##
|
19
|
+
# Tests
|
20
|
+
#
|
21
|
+
describe 'included properties' do
|
22
|
+
it { is_expected.to respond_to :id }
|
23
|
+
it { is_expected.to respond_to :type }
|
24
|
+
it { is_expected.to respond_to :text }
|
25
|
+
it { is_expected.to respond_to :sub_item_ids }
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::Properties::Container do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
let(:object) do
|
16
|
+
obj = Object.new
|
17
|
+
# Always include Identifier as top of the #to_h chain
|
18
|
+
obj.extend Content::Properties::Identifier
|
19
|
+
obj.extend described_class
|
20
|
+
|
21
|
+
obj
|
22
|
+
end
|
23
|
+
|
24
|
+
##
|
25
|
+
# Subject
|
26
|
+
#
|
27
|
+
subject { object }
|
28
|
+
|
29
|
+
##
|
30
|
+
# Tests
|
31
|
+
#
|
32
|
+
describe 'included properties' do
|
33
|
+
it { is_expected.to respond_to :child_item_ids }
|
34
|
+
it { is_expected.to respond_to :to_h }
|
35
|
+
|
36
|
+
it 'sets a default value for the property' do
|
37
|
+
expect(subject.child_item_ids).not_to be_nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe 'methods' do
|
42
|
+
describe '#to_h' do
|
43
|
+
it 'returns a hash with the property' do
|
44
|
+
expect(subject.to_h).to include :child_item_ids
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::Properties::Identifier do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
let(:object) do
|
16
|
+
obj = Object.new
|
17
|
+
obj.extend described_class
|
18
|
+
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# Subject
|
22
|
+
#
|
23
|
+
subject { object }
|
24
|
+
|
25
|
+
##
|
26
|
+
# Tests
|
27
|
+
#
|
28
|
+
describe 'included properties' do
|
29
|
+
it { is_expected.to respond_to :id }
|
30
|
+
it { is_expected.to respond_to :to_h }
|
31
|
+
|
32
|
+
it 'sets a default value for the property' do
|
33
|
+
expect(subject.id).not_to be_nil
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe 'methods' do
|
38
|
+
describe '#to_h' do
|
39
|
+
it 'returns a hash with the property' do
|
40
|
+
expect(subject.to_h).to include :id
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
describe '#generate_id' do
|
45
|
+
it 'returns a 10-digit string' do
|
46
|
+
expect(subject.send :generate_id).to be_a String
|
47
|
+
expect(subject.send(:generate_id).length).to eq 10
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::Properties::Metadata do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
let(:object) do
|
16
|
+
obj = Object.new
|
17
|
+
# Always include Identifier as top of the #to_h chain
|
18
|
+
obj.extend Content::Properties::Identifier
|
19
|
+
obj.extend described_class
|
20
|
+
|
21
|
+
obj
|
22
|
+
end
|
23
|
+
|
24
|
+
##
|
25
|
+
# Subject
|
26
|
+
#
|
27
|
+
subject { object }
|
28
|
+
|
29
|
+
##
|
30
|
+
# Tests
|
31
|
+
#
|
32
|
+
describe 'included properties' do
|
33
|
+
it { is_expected.to respond_to :metadata }
|
34
|
+
it { is_expected.to respond_to :to_h }
|
35
|
+
|
36
|
+
it 'sets a default value for the property' do
|
37
|
+
expect(subject.metadata).not_to be_nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe 'methods' do
|
42
|
+
describe '#to_h' do
|
43
|
+
it 'returns a hash with the property' do
|
44
|
+
expect(subject.to_h).to include :metadata
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|