openwebslides-converter 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +54 -0
- data/.rdoc_options +16 -0
- data/.rspec +1 -0
- data/.rubocop.yml +16 -0
- data/.travis.yml +6 -0
- data/Gemfile +5 -0
- data/LICENSE.md +20 -0
- data/README.md +23 -0
- data/Rakefile +24 -0
- data/lib/openwebslides/converter.rb +8 -0
- data/lib/openwebslides/converter/content.rb +13 -0
- data/lib/openwebslides/converter/content/content_item.rb +12 -0
- data/lib/openwebslides/converter/content/heading.rb +13 -0
- data/lib/openwebslides/converter/content/paragraph.rb +13 -0
- data/lib/openwebslides/converter/content/properties/container.rb +21 -0
- data/lib/openwebslides/converter/content/properties/identifier.rb +41 -0
- data/lib/openwebslides/converter/content/properties/metadata.rb +24 -0
- data/lib/openwebslides/converter/content/properties/subable.rb +21 -0
- data/lib/openwebslides/converter/content/properties/text.rb +21 -0
- data/lib/openwebslides/converter/content/properties/type.rb +21 -0
- data/lib/openwebslides/converter/content/root.rb +11 -0
- data/lib/openwebslides/converter/helpers/sanitization.rb +21 -0
- data/lib/openwebslides/converter/pressbooks.rb +181 -0
- data/lib/openwebslides/converter/result.rb +16 -0
- data/lib/openwebslides/converter/version.rb +7 -0
- data/openwebslides-converter.gemspec +43 -0
- data/spec/content/content_item_spec.rb +25 -0
- data/spec/content/heading_spec.rb +27 -0
- data/spec/content/paragraph_spec.rb +27 -0
- data/spec/content/properties/container_spec.rb +48 -0
- data/spec/content/properties/identifier_spec.rb +51 -0
- data/spec/content/properties/metadata_spec.rb +48 -0
- data/spec/content/properties/subable_spec.rb +48 -0
- data/spec/content/properties/text_spec.rb +48 -0
- data/spec/content/properties/type_spec.rb +48 -0
- data/spec/content/root_spec.rb +26 -0
- data/spec/converter_spec.rb +10 -0
- data/spec/spec_helper.rb +11 -0
- metadata +221 -0
@@ -0,0 +1,181 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'reverse_markdown'
|
5
|
+
|
6
|
+
module OpenWebslides
|
7
|
+
module Converter
|
8
|
+
class Pressbooks
|
9
|
+
include Helpers::Sanitization
|
10
|
+
|
11
|
+
attr_accessor :html,
|
12
|
+
:result
|
13
|
+
|
14
|
+
##
|
15
|
+
# Convert an XHTML document to Open Webslides
|
16
|
+
#
|
17
|
+
# @param [String] html XHTML document string
|
18
|
+
# @return [Result] Result object containing all data
|
19
|
+
#
|
20
|
+
def from_xhtml(html)
|
21
|
+
@html = Nokogiri::HTML html
|
22
|
+
@result = Result.new
|
23
|
+
|
24
|
+
root = Content::Root.new
|
25
|
+
|
26
|
+
parse_metadata
|
27
|
+
|
28
|
+
root.child_item_ids << parse_matter('front').id
|
29
|
+
root.child_item_ids.concat parse_parts.map(&:id)
|
30
|
+
root.child_item_ids << parse_matter('back').id
|
31
|
+
|
32
|
+
result.content_items << root
|
33
|
+
|
34
|
+
result
|
35
|
+
end
|
36
|
+
|
37
|
+
protected
|
38
|
+
|
39
|
+
##
|
40
|
+
# Find and parse metadata
|
41
|
+
#
|
42
|
+
def parse_metadata
|
43
|
+
result.title = html.at('meta[name="pb-title"]')['content']
|
44
|
+
result.author = html.at('meta[name="pb-authors"]')['content']
|
45
|
+
result.license = html.at('meta[name="pb-book-license"]')['content']
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# Find and parse matter
|
50
|
+
#
|
51
|
+
# @param [String] position 'front' or 'back'
|
52
|
+
#
|
53
|
+
def parse_matter(position)
|
54
|
+
# Create new heading
|
55
|
+
heading = Content::Heading.new
|
56
|
+
|
57
|
+
# Set title
|
58
|
+
heading.text = html.at(".#{position}-matter-title").content
|
59
|
+
|
60
|
+
# Parse front matter paragraphs
|
61
|
+
paragraphs = parse_paragraphs html.at(".#{position}-matter-ugc")
|
62
|
+
|
63
|
+
# Add paragraphs to the front matter header
|
64
|
+
heading.sub_item_ids = paragraphs.map(&:id)
|
65
|
+
|
66
|
+
# Add front matter heading to result
|
67
|
+
result.content_items << heading
|
68
|
+
|
69
|
+
heading
|
70
|
+
end
|
71
|
+
|
72
|
+
##
|
73
|
+
# Find and parse parts
|
74
|
+
#
|
75
|
+
def parse_parts
|
76
|
+
parts = []
|
77
|
+
|
78
|
+
html.search('div.part, div.chapter').each do |div|
|
79
|
+
if div.classes.include? 'part'
|
80
|
+
# Create new part heading
|
81
|
+
parts << Content::Heading.new
|
82
|
+
|
83
|
+
# Set title
|
84
|
+
parts.last.text = div.at('.part-title').content
|
85
|
+
|
86
|
+
# Part intro (paragraphs under part header)
|
87
|
+
intro = parse_paragraphs div
|
88
|
+
|
89
|
+
# Add intro to part
|
90
|
+
parts.last.sub_item_ids = intro.map(&:id)
|
91
|
+
elsif div.classes.include? 'chapter'
|
92
|
+
chapter = parse_chapter div
|
93
|
+
|
94
|
+
# Add chapter heading to the part
|
95
|
+
parts.last.sub_item_ids << chapter.id
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Add parts to result
|
100
|
+
result.content_items.concat parts
|
101
|
+
|
102
|
+
parts
|
103
|
+
end
|
104
|
+
|
105
|
+
##
|
106
|
+
# Parse chapter XHTML object into content item
|
107
|
+
def parse_chapter(html)
|
108
|
+
# Create new chapter
|
109
|
+
chapter = Content::Heading.new
|
110
|
+
|
111
|
+
# Set title
|
112
|
+
chapter.text = html.at('.chapter-title').content
|
113
|
+
|
114
|
+
# Add chapter heading to result
|
115
|
+
result.content_items << chapter
|
116
|
+
|
117
|
+
# Start with chapter heading as current heading
|
118
|
+
heading = chapter
|
119
|
+
|
120
|
+
# Parse chapter content
|
121
|
+
html.at('.chapter-ugc').children.each do |child|
|
122
|
+
if child.name == 'p'
|
123
|
+
# Parse paragraph
|
124
|
+
paragraph = parse_paragraph child
|
125
|
+
|
126
|
+
# Add paragraph to current heading
|
127
|
+
heading.sub_item_ids << paragraph.id if paragraph
|
128
|
+
elsif child.name == 'h4'
|
129
|
+
# Create new heading
|
130
|
+
h = Content::Heading.new
|
131
|
+
|
132
|
+
# Set title
|
133
|
+
h.text = child.content
|
134
|
+
|
135
|
+
# Add heading to result
|
136
|
+
result.content_items << h
|
137
|
+
|
138
|
+
# Add heading to current heading
|
139
|
+
chapter.sub_item_ids << h.id
|
140
|
+
|
141
|
+
# Replace current heading
|
142
|
+
heading = h
|
143
|
+
elsif child.name == 'ul' || child.name == 'ol'
|
144
|
+
# Extract list items
|
145
|
+
paragraphs = child.search('li').map { |l| parse_paragraph l }
|
146
|
+
|
147
|
+
# Add paragraphs to heading
|
148
|
+
heading.sub_item_ids.concat paragraphs.map(&:id)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
chapter
|
153
|
+
end
|
154
|
+
|
155
|
+
##
|
156
|
+
# Parse XHTML object into paragraph content items
|
157
|
+
#
|
158
|
+
def parse_paragraphs(html)
|
159
|
+
html.search('p').map { |p| parse_paragraph p }
|
160
|
+
end
|
161
|
+
|
162
|
+
##
|
163
|
+
# Parse paragraph XHTML object into content item
|
164
|
+
#
|
165
|
+
def parse_paragraph(html)
|
166
|
+
# Create new paragraph
|
167
|
+
paragraph = Content::Paragraph.new
|
168
|
+
|
169
|
+
# Extract and sanitize paragraph contents
|
170
|
+
paragraph.text = sanitize ReverseMarkdown.convert html.to_xhtml
|
171
|
+
|
172
|
+
return nil if paragraph.text.empty?
|
173
|
+
|
174
|
+
# Add paragraph to result
|
175
|
+
result.content_items << paragraph
|
176
|
+
|
177
|
+
paragraph
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'openwebslides/converter/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |gem|
|
8
|
+
gem.name = 'openwebslides-converter'
|
9
|
+
gem.version = OpenWebslides::Converter::VERSION
|
10
|
+
gem.summary = 'Open Webslides Converter'
|
11
|
+
gem.description = 'Converts arbitrary content data formats to Open Webslides data format'
|
12
|
+
gem.license = 'MIT'
|
13
|
+
gem.authors = ['Florian Dejonckheere']
|
14
|
+
gem.email = 'florian@floriandejonckheere.be'
|
15
|
+
gem.homepage = 'https://rubygems.org/gems/openwebslides-converter'
|
16
|
+
|
17
|
+
gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
18
|
+
|
19
|
+
`git submodule --quiet foreach --recursive pwd`.split($INPUT_RECORD_SEPARATOR).each do |submodule|
|
20
|
+
submodule.sub!("#{Dir.pwd}/", '')
|
21
|
+
|
22
|
+
Dir.chdir(submodule) do
|
23
|
+
`git ls-files`.split($INPUT_RECORD_SEPARATOR).map do |subpath|
|
24
|
+
gem.files << File.join(submodule, subpath)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
30
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
31
|
+
gem.require_paths = ['lib']
|
32
|
+
|
33
|
+
gem.add_dependency 'nokogiri', '~> 1.8'
|
34
|
+
gem.add_dependency 'reverse_markdown', '~> 1.1'
|
35
|
+
|
36
|
+
gem.add_development_dependency 'bundler', '~> 1.10'
|
37
|
+
gem.add_development_dependency 'coveralls', '~> 0.8'
|
38
|
+
gem.add_development_dependency 'rake', '~> 10.0'
|
39
|
+
gem.add_development_dependency 'rdoc', '~> 4.0'
|
40
|
+
gem.add_development_dependency 'rspec', '~> 3.0'
|
41
|
+
gem.add_development_dependency 'rubocop', '~> 0.58'
|
42
|
+
gem.add_development_dependency 'rubygems-tasks', '~> 0.2'
|
43
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::ContentItem do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
##
|
16
|
+
# Subject
|
17
|
+
#
|
18
|
+
##
|
19
|
+
# Tests
|
20
|
+
#
|
21
|
+
describe 'included properties' do
|
22
|
+
it { is_expected.to respond_to :id }
|
23
|
+
it { is_expected.to respond_to :type }
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::Heading do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
##
|
16
|
+
# Subject
|
17
|
+
#
|
18
|
+
##
|
19
|
+
# Tests
|
20
|
+
#
|
21
|
+
describe 'included properties' do
|
22
|
+
it { is_expected.to respond_to :id }
|
23
|
+
it { is_expected.to respond_to :type }
|
24
|
+
it { is_expected.to respond_to :text }
|
25
|
+
it { is_expected.to respond_to :sub_item_ids }
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::Paragraph do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
##
|
16
|
+
# Subject
|
17
|
+
#
|
18
|
+
##
|
19
|
+
# Tests
|
20
|
+
#
|
21
|
+
describe 'included properties' do
|
22
|
+
it { is_expected.to respond_to :id }
|
23
|
+
it { is_expected.to respond_to :type }
|
24
|
+
it { is_expected.to respond_to :text }
|
25
|
+
it { is_expected.to respond_to :sub_item_ids }
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::Properties::Container do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
let(:object) do
|
16
|
+
obj = Object.new
|
17
|
+
# Always include Identifier as top of the #to_h chain
|
18
|
+
obj.extend Content::Properties::Identifier
|
19
|
+
obj.extend described_class
|
20
|
+
|
21
|
+
obj
|
22
|
+
end
|
23
|
+
|
24
|
+
##
|
25
|
+
# Subject
|
26
|
+
#
|
27
|
+
subject { object }
|
28
|
+
|
29
|
+
##
|
30
|
+
# Tests
|
31
|
+
#
|
32
|
+
describe 'included properties' do
|
33
|
+
it { is_expected.to respond_to :child_item_ids }
|
34
|
+
it { is_expected.to respond_to :to_h }
|
35
|
+
|
36
|
+
it 'sets a default value for the property' do
|
37
|
+
expect(subject.child_item_ids).not_to be_nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe 'methods' do
|
42
|
+
describe '#to_h' do
|
43
|
+
it 'returns a hash with the property' do
|
44
|
+
expect(subject.to_h).to include :child_item_ids
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::Properties::Identifier do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
let(:object) do
|
16
|
+
obj = Object.new
|
17
|
+
obj.extend described_class
|
18
|
+
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# Subject
|
22
|
+
#
|
23
|
+
subject { object }
|
24
|
+
|
25
|
+
##
|
26
|
+
# Tests
|
27
|
+
#
|
28
|
+
describe 'included properties' do
|
29
|
+
it { is_expected.to respond_to :id }
|
30
|
+
it { is_expected.to respond_to :to_h }
|
31
|
+
|
32
|
+
it 'sets a default value for the property' do
|
33
|
+
expect(subject.id).not_to be_nil
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe 'methods' do
|
38
|
+
describe '#to_h' do
|
39
|
+
it 'returns a hash with the property' do
|
40
|
+
expect(subject.to_h).to include :id
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
describe '#generate_id' do
|
45
|
+
it 'returns a 10-digit string' do
|
46
|
+
expect(subject.send :generate_id).to be_a String
|
47
|
+
expect(subject.send(:generate_id).length).to eq 10
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
RSpec.describe Content::Properties::Metadata do
|
6
|
+
##
|
7
|
+
# Configuration
|
8
|
+
#
|
9
|
+
##
|
10
|
+
# Stubs and mocks
|
11
|
+
#
|
12
|
+
##
|
13
|
+
# Test variables
|
14
|
+
#
|
15
|
+
let(:object) do
|
16
|
+
obj = Object.new
|
17
|
+
# Always include Identifier as top of the #to_h chain
|
18
|
+
obj.extend Content::Properties::Identifier
|
19
|
+
obj.extend described_class
|
20
|
+
|
21
|
+
obj
|
22
|
+
end
|
23
|
+
|
24
|
+
##
|
25
|
+
# Subject
|
26
|
+
#
|
27
|
+
subject { object }
|
28
|
+
|
29
|
+
##
|
30
|
+
# Tests
|
31
|
+
#
|
32
|
+
describe 'included properties' do
|
33
|
+
it { is_expected.to respond_to :metadata }
|
34
|
+
it { is_expected.to respond_to :to_h }
|
35
|
+
|
36
|
+
it 'sets a default value for the property' do
|
37
|
+
expect(subject.metadata).not_to be_nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe 'methods' do
|
42
|
+
describe '#to_h' do
|
43
|
+
it 'returns a hash with the property' do
|
44
|
+
expect(subject.to_h).to include :metadata
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|