openwebslides-converter 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +54 -0
  3. data/.rdoc_options +16 -0
  4. data/.rspec +1 -0
  5. data/.rubocop.yml +16 -0
  6. data/.travis.yml +6 -0
  7. data/Gemfile +5 -0
  8. data/LICENSE.md +20 -0
  9. data/README.md +23 -0
  10. data/Rakefile +24 -0
  11. data/lib/openwebslides/converter.rb +8 -0
  12. data/lib/openwebslides/converter/content.rb +13 -0
  13. data/lib/openwebslides/converter/content/content_item.rb +12 -0
  14. data/lib/openwebslides/converter/content/heading.rb +13 -0
  15. data/lib/openwebslides/converter/content/paragraph.rb +13 -0
  16. data/lib/openwebslides/converter/content/properties/container.rb +21 -0
  17. data/lib/openwebslides/converter/content/properties/identifier.rb +41 -0
  18. data/lib/openwebslides/converter/content/properties/metadata.rb +24 -0
  19. data/lib/openwebslides/converter/content/properties/subable.rb +21 -0
  20. data/lib/openwebslides/converter/content/properties/text.rb +21 -0
  21. data/lib/openwebslides/converter/content/properties/type.rb +21 -0
  22. data/lib/openwebslides/converter/content/root.rb +11 -0
  23. data/lib/openwebslides/converter/helpers/sanitization.rb +21 -0
  24. data/lib/openwebslides/converter/pressbooks.rb +181 -0
  25. data/lib/openwebslides/converter/result.rb +16 -0
  26. data/lib/openwebslides/converter/version.rb +7 -0
  27. data/openwebslides-converter.gemspec +43 -0
  28. data/spec/content/content_item_spec.rb +25 -0
  29. data/spec/content/heading_spec.rb +27 -0
  30. data/spec/content/paragraph_spec.rb +27 -0
  31. data/spec/content/properties/container_spec.rb +48 -0
  32. data/spec/content/properties/identifier_spec.rb +51 -0
  33. data/spec/content/properties/metadata_spec.rb +48 -0
  34. data/spec/content/properties/subable_spec.rb +48 -0
  35. data/spec/content/properties/text_spec.rb +48 -0
  36. data/spec/content/properties/type_spec.rb +48 -0
  37. data/spec/content/root_spec.rb +26 -0
  38. data/spec/converter_spec.rb +10 -0
  39. data/spec/spec_helper.rb +11 -0
  40. metadata +221 -0
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'reverse_markdown'
5
+
6
+ module OpenWebslides
7
+ module Converter
8
+ class Pressbooks
9
+ include Helpers::Sanitization
10
+
11
+ attr_accessor :html,
12
+ :result
13
+
14
+ ##
15
+ # Convert an XHTML document to Open Webslides
16
+ #
17
+ # @param [String] html XHTML document string
18
+ # @return [Result] Result object containing all data
19
+ #
20
+ def from_xhtml(html)
21
+ @html = Nokogiri::HTML html
22
+ @result = Result.new
23
+
24
+ root = Content::Root.new
25
+
26
+ parse_metadata
27
+
28
+ root.child_item_ids << parse_matter('front').id
29
+ root.child_item_ids.concat parse_parts.map(&:id)
30
+ root.child_item_ids << parse_matter('back').id
31
+
32
+ result.content_items << root
33
+
34
+ result
35
+ end
36
+
37
+ protected
38
+
39
+ ##
40
+ # Find and parse metadata
41
+ #
42
+ def parse_metadata
43
+ result.title = html.at('meta[name="pb-title"]')['content']
44
+ result.author = html.at('meta[name="pb-authors"]')['content']
45
+ result.license = html.at('meta[name="pb-book-license"]')['content']
46
+ end
47
+
48
+ ##
49
+ # Find and parse matter
50
+ #
51
+ # @param [String] position 'front' or 'back'
52
+ #
53
+ def parse_matter(position)
54
+ # Create new heading
55
+ heading = Content::Heading.new
56
+
57
+ # Set title
58
+ heading.text = html.at(".#{position}-matter-title").content
59
+
60
+ # Parse front matter paragraphs
61
+ paragraphs = parse_paragraphs html.at(".#{position}-matter-ugc")
62
+
63
+ # Add paragraphs to the front matter header
64
+ heading.sub_item_ids = paragraphs.map(&:id)
65
+
66
+ # Add front matter heading to result
67
+ result.content_items << heading
68
+
69
+ heading
70
+ end
71
+
72
+ ##
73
+ # Find and parse parts
74
+ #
75
+ def parse_parts
76
+ parts = []
77
+
78
+ html.search('div.part, div.chapter').each do |div|
79
+ if div.classes.include? 'part'
80
+ # Create new part heading
81
+ parts << Content::Heading.new
82
+
83
+ # Set title
84
+ parts.last.text = div.at('.part-title').content
85
+
86
+ # Part intro (paragraphs under part header)
87
+ intro = parse_paragraphs div
88
+
89
+ # Add intro to part
90
+ parts.last.sub_item_ids = intro.map(&:id)
91
+ elsif div.classes.include? 'chapter'
92
+ chapter = parse_chapter div
93
+
94
+ # Add chapter heading to the part
95
+ parts.last.sub_item_ids << chapter.id
96
+ end
97
+ end
98
+
99
+ # Add parts to result
100
+ result.content_items.concat parts
101
+
102
+ parts
103
+ end
104
+
105
+ ##
106
+ # Parse chapter XHTML object into content item
107
+ def parse_chapter(html)
108
+ # Create new chapter
109
+ chapter = Content::Heading.new
110
+
111
+ # Set title
112
+ chapter.text = html.at('.chapter-title').content
113
+
114
+ # Add chapter heading to result
115
+ result.content_items << chapter
116
+
117
+ # Start with chapter heading as current heading
118
+ heading = chapter
119
+
120
+ # Parse chapter content
121
+ html.at('.chapter-ugc').children.each do |child|
122
+ if child.name == 'p'
123
+ # Parse paragraph
124
+ paragraph = parse_paragraph child
125
+
126
+ # Add paragraph to current heading
127
+ heading.sub_item_ids << paragraph.id if paragraph
128
+ elsif child.name == 'h4'
129
+ # Create new heading
130
+ h = Content::Heading.new
131
+
132
+ # Set title
133
+ h.text = child.content
134
+
135
+ # Add heading to result
136
+ result.content_items << h
137
+
138
+ # Add heading to current heading
139
+ chapter.sub_item_ids << h.id
140
+
141
+ # Replace current heading
142
+ heading = h
143
+ elsif child.name == 'ul' || child.name == 'ol'
144
+ # Extract list items
145
+ paragraphs = child.search('li').map { |l| parse_paragraph l }
146
+
147
+ # Add paragraphs to heading
148
+ heading.sub_item_ids.concat paragraphs.map(&:id)
149
+ end
150
+ end
151
+
152
+ chapter
153
+ end
154
+
155
+ ##
156
+ # Parse XHTML object into paragraph content items
157
+ #
158
+ def parse_paragraphs(html)
159
+ html.search('p').map { |p| parse_paragraph p }
160
+ end
161
+
162
+ ##
163
+ # Parse paragraph XHTML object into content item
164
+ #
165
+ def parse_paragraph(html)
166
+ # Create new paragraph
167
+ paragraph = Content::Paragraph.new
168
+
169
+ # Extract and sanitize paragraph contents
170
+ paragraph.text = sanitize ReverseMarkdown.convert html.to_xhtml
171
+
172
+ return nil if paragraph.text.empty?
173
+
174
+ # Add paragraph to result
175
+ result.content_items << paragraph
176
+
177
+ paragraph
178
+ end
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenWebslides
4
+ module Converter
5
+ class Result
6
+ attr_accessor :title,
7
+ :author,
8
+ :license,
9
+ :content_items
10
+
11
+ def initialize
12
+ @content_items = []
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenWebslides
4
+ module Converter
5
+ VERSION = '0.1.0'
6
+ end
7
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'openwebslides/converter/version'
6
+
7
+ Gem::Specification.new do |gem|
8
+ gem.name = 'openwebslides-converter'
9
+ gem.version = OpenWebslides::Converter::VERSION
10
+ gem.summary = 'Open Webslides Converter'
11
+ gem.description = 'Converts arbitrary content data formats to Open Webslides data format'
12
+ gem.license = 'MIT'
13
+ gem.authors = ['Florian Dejonckheere']
14
+ gem.email = 'florian@floriandejonckheere.be'
15
+ gem.homepage = 'https://rubygems.org/gems/openwebslides-converter'
16
+
17
+ gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
18
+
19
+ `git submodule --quiet foreach --recursive pwd`.split($INPUT_RECORD_SEPARATOR).each do |submodule|
20
+ submodule.sub!("#{Dir.pwd}/", '')
21
+
22
+ Dir.chdir(submodule) do
23
+ `git ls-files`.split($INPUT_RECORD_SEPARATOR).map do |subpath|
24
+ gem.files << File.join(submodule, subpath)
25
+ end
26
+ end
27
+ end
28
+
29
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
30
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
31
+ gem.require_paths = ['lib']
32
+
33
+ gem.add_dependency 'nokogiri', '~> 1.8'
34
+ gem.add_dependency 'reverse_markdown', '~> 1.1'
35
+
36
+ gem.add_development_dependency 'bundler', '~> 1.10'
37
+ gem.add_development_dependency 'coveralls', '~> 0.8'
38
+ gem.add_development_dependency 'rake', '~> 10.0'
39
+ gem.add_development_dependency 'rdoc', '~> 4.0'
40
+ gem.add_development_dependency 'rspec', '~> 3.0'
41
+ gem.add_development_dependency 'rubocop', '~> 0.58'
42
+ gem.add_development_dependency 'rubygems-tasks', '~> 0.2'
43
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::ContentItem do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ ##
16
+ # Subject
17
+ #
18
+ ##
19
+ # Tests
20
+ #
21
+ describe 'included properties' do
22
+ it { is_expected.to respond_to :id }
23
+ it { is_expected.to respond_to :type }
24
+ end
25
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::Heading do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ ##
16
+ # Subject
17
+ #
18
+ ##
19
+ # Tests
20
+ #
21
+ describe 'included properties' do
22
+ it { is_expected.to respond_to :id }
23
+ it { is_expected.to respond_to :type }
24
+ it { is_expected.to respond_to :text }
25
+ it { is_expected.to respond_to :sub_item_ids }
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::Paragraph do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ ##
16
+ # Subject
17
+ #
18
+ ##
19
+ # Tests
20
+ #
21
+ describe 'included properties' do
22
+ it { is_expected.to respond_to :id }
23
+ it { is_expected.to respond_to :type }
24
+ it { is_expected.to respond_to :text }
25
+ it { is_expected.to respond_to :sub_item_ids }
26
+ end
27
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::Properties::Container do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ let(:object) do
16
+ obj = Object.new
17
+ # Always include Identifier as top of the #to_h chain
18
+ obj.extend Content::Properties::Identifier
19
+ obj.extend described_class
20
+
21
+ obj
22
+ end
23
+
24
+ ##
25
+ # Subject
26
+ #
27
+ subject { object }
28
+
29
+ ##
30
+ # Tests
31
+ #
32
+ describe 'included properties' do
33
+ it { is_expected.to respond_to :child_item_ids }
34
+ it { is_expected.to respond_to :to_h }
35
+
36
+ it 'sets a default value for the property' do
37
+ expect(subject.child_item_ids).not_to be_nil
38
+ end
39
+ end
40
+
41
+ describe 'methods' do
42
+ describe '#to_h' do
43
+ it 'returns a hash with the property' do
44
+ expect(subject.to_h).to include :child_item_ids
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::Properties::Identifier do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ let(:object) do
16
+ obj = Object.new
17
+ obj.extend described_class
18
+ end
19
+
20
+ ##
21
+ # Subject
22
+ #
23
+ subject { object }
24
+
25
+ ##
26
+ # Tests
27
+ #
28
+ describe 'included properties' do
29
+ it { is_expected.to respond_to :id }
30
+ it { is_expected.to respond_to :to_h }
31
+
32
+ it 'sets a default value for the property' do
33
+ expect(subject.id).not_to be_nil
34
+ end
35
+ end
36
+
37
+ describe 'methods' do
38
+ describe '#to_h' do
39
+ it 'returns a hash with the property' do
40
+ expect(subject.to_h).to include :id
41
+ end
42
+ end
43
+
44
+ describe '#generate_id' do
45
+ it 'returns a 10-digit string' do
46
+ expect(subject.send :generate_id).to be_a String
47
+ expect(subject.send(:generate_id).length).to eq 10
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::Properties::Metadata do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ let(:object) do
16
+ obj = Object.new
17
+ # Always include Identifier as top of the #to_h chain
18
+ obj.extend Content::Properties::Identifier
19
+ obj.extend described_class
20
+
21
+ obj
22
+ end
23
+
24
+ ##
25
+ # Subject
26
+ #
27
+ subject { object }
28
+
29
+ ##
30
+ # Tests
31
+ #
32
+ describe 'included properties' do
33
+ it { is_expected.to respond_to :metadata }
34
+ it { is_expected.to respond_to :to_h }
35
+
36
+ it 'sets a default value for the property' do
37
+ expect(subject.metadata).not_to be_nil
38
+ end
39
+ end
40
+
41
+ describe 'methods' do
42
+ describe '#to_h' do
43
+ it 'returns a hash with the property' do
44
+ expect(subject.to_h).to include :metadata
45
+ end
46
+ end
47
+ end
48
+ end