openwebslides-converter 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +54 -0
  3. data/.rdoc_options +16 -0
  4. data/.rspec +1 -0
  5. data/.rubocop.yml +16 -0
  6. data/.travis.yml +6 -0
  7. data/Gemfile +5 -0
  8. data/LICENSE.md +20 -0
  9. data/README.md +23 -0
  10. data/Rakefile +24 -0
  11. data/lib/openwebslides/converter.rb +8 -0
  12. data/lib/openwebslides/converter/content.rb +13 -0
  13. data/lib/openwebslides/converter/content/content_item.rb +12 -0
  14. data/lib/openwebslides/converter/content/heading.rb +13 -0
  15. data/lib/openwebslides/converter/content/paragraph.rb +13 -0
  16. data/lib/openwebslides/converter/content/properties/container.rb +21 -0
  17. data/lib/openwebslides/converter/content/properties/identifier.rb +41 -0
  18. data/lib/openwebslides/converter/content/properties/metadata.rb +24 -0
  19. data/lib/openwebslides/converter/content/properties/subable.rb +21 -0
  20. data/lib/openwebslides/converter/content/properties/text.rb +21 -0
  21. data/lib/openwebslides/converter/content/properties/type.rb +21 -0
  22. data/lib/openwebslides/converter/content/root.rb +11 -0
  23. data/lib/openwebslides/converter/helpers/sanitization.rb +21 -0
  24. data/lib/openwebslides/converter/pressbooks.rb +181 -0
  25. data/lib/openwebslides/converter/result.rb +16 -0
  26. data/lib/openwebslides/converter/version.rb +7 -0
  27. data/openwebslides-converter.gemspec +43 -0
  28. data/spec/content/content_item_spec.rb +25 -0
  29. data/spec/content/heading_spec.rb +27 -0
  30. data/spec/content/paragraph_spec.rb +27 -0
  31. data/spec/content/properties/container_spec.rb +48 -0
  32. data/spec/content/properties/identifier_spec.rb +51 -0
  33. data/spec/content/properties/metadata_spec.rb +48 -0
  34. data/spec/content/properties/subable_spec.rb +48 -0
  35. data/spec/content/properties/text_spec.rb +48 -0
  36. data/spec/content/properties/type_spec.rb +48 -0
  37. data/spec/content/root_spec.rb +26 -0
  38. data/spec/converter_spec.rb +10 -0
  39. data/spec/spec_helper.rb +11 -0
  40. metadata +221 -0
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'reverse_markdown'
5
+
6
+ module OpenWebslides
7
+ module Converter
8
+ class Pressbooks
9
+ include Helpers::Sanitization
10
+
11
+ attr_accessor :html,
12
+ :result
13
+
14
+ ##
15
+ # Convert an XHTML document to Open Webslides
16
+ #
17
+ # @param [String] html XHTML document string
18
+ # @return [Result] Result object containing all data
19
+ #
20
+ def from_xhtml(html)
21
+ @html = Nokogiri::HTML html
22
+ @result = Result.new
23
+
24
+ root = Content::Root.new
25
+
26
+ parse_metadata
27
+
28
+ root.child_item_ids << parse_matter('front').id
29
+ root.child_item_ids.concat parse_parts.map(&:id)
30
+ root.child_item_ids << parse_matter('back').id
31
+
32
+ result.content_items << root
33
+
34
+ result
35
+ end
36
+
37
+ protected
38
+
39
+ ##
40
+ # Find and parse metadata
41
+ #
42
+ def parse_metadata
43
+ result.title = html.at('meta[name="pb-title"]')['content']
44
+ result.author = html.at('meta[name="pb-authors"]')['content']
45
+ result.license = html.at('meta[name="pb-book-license"]')['content']
46
+ end
47
+
48
+ ##
49
+ # Find and parse matter
50
+ #
51
+ # @param [String] position 'front' or 'back'
52
+ #
53
+ def parse_matter(position)
54
+ # Create new heading
55
+ heading = Content::Heading.new
56
+
57
+ # Set title
58
+ heading.text = html.at(".#{position}-matter-title").content
59
+
60
+ # Parse front matter paragraphs
61
+ paragraphs = parse_paragraphs html.at(".#{position}-matter-ugc")
62
+
63
+ # Add paragraphs to the front matter header
64
+ heading.sub_item_ids = paragraphs.map(&:id)
65
+
66
+ # Add front matter heading to result
67
+ result.content_items << heading
68
+
69
+ heading
70
+ end
71
+
72
+ ##
73
+ # Find and parse parts
74
+ #
75
+ def parse_parts
76
+ parts = []
77
+
78
+ html.search('div.part, div.chapter').each do |div|
79
+ if div.classes.include? 'part'
80
+ # Create new part heading
81
+ parts << Content::Heading.new
82
+
83
+ # Set title
84
+ parts.last.text = div.at('.part-title').content
85
+
86
+ # Part intro (paragraphs under part header)
87
+ intro = parse_paragraphs div
88
+
89
+ # Add intro to part
90
+ parts.last.sub_item_ids = intro.map(&:id)
91
+ elsif div.classes.include? 'chapter'
92
+ chapter = parse_chapter div
93
+
94
+ # Add chapter heading to the part
95
+ parts.last.sub_item_ids << chapter.id
96
+ end
97
+ end
98
+
99
+ # Add parts to result
100
+ result.content_items.concat parts
101
+
102
+ parts
103
+ end
104
+
105
+ ##
106
+ # Parse chapter XHTML object into content item
107
+ def parse_chapter(html)
108
+ # Create new chapter
109
+ chapter = Content::Heading.new
110
+
111
+ # Set title
112
+ chapter.text = html.at('.chapter-title').content
113
+
114
+ # Add chapter heading to result
115
+ result.content_items << chapter
116
+
117
+ # Start with chapter heading as current heading
118
+ heading = chapter
119
+
120
+ # Parse chapter content
121
+ html.at('.chapter-ugc').children.each do |child|
122
+ if child.name == 'p'
123
+ # Parse paragraph
124
+ paragraph = parse_paragraph child
125
+
126
+ # Add paragraph to current heading
127
+ heading.sub_item_ids << paragraph.id if paragraph
128
+ elsif child.name == 'h4'
129
+ # Create new heading
130
+ h = Content::Heading.new
131
+
132
+ # Set title
133
+ h.text = child.content
134
+
135
+ # Add heading to result
136
+ result.content_items << h
137
+
138
+ # Add heading to current heading
139
+ chapter.sub_item_ids << h.id
140
+
141
+ # Replace current heading
142
+ heading = h
143
+ elsif child.name == 'ul' || child.name == 'ol'
144
+ # Extract list items
145
+ paragraphs = child.search('li').map { |l| parse_paragraph l }
146
+
147
+ # Add paragraphs to heading
148
+ heading.sub_item_ids.concat paragraphs.map(&:id)
149
+ end
150
+ end
151
+
152
+ chapter
153
+ end
154
+
155
+ ##
156
+ # Parse XHTML object into paragraph content items
157
+ #
158
+ def parse_paragraphs(html)
159
+ html.search('p').map { |p| parse_paragraph p }
160
+ end
161
+
162
+ ##
163
+ # Parse paragraph XHTML object into content item
164
+ #
165
+ def parse_paragraph(html)
166
+ # Create new paragraph
167
+ paragraph = Content::Paragraph.new
168
+
169
+ # Extract and sanitize paragraph contents
170
+ paragraph.text = sanitize ReverseMarkdown.convert html.to_xhtml
171
+
172
+ return nil if paragraph.text.empty?
173
+
174
+ # Add paragraph to result
175
+ result.content_items << paragraph
176
+
177
+ paragraph
178
+ end
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenWebslides
4
+ module Converter
5
+ class Result
6
+ attr_accessor :title,
7
+ :author,
8
+ :license,
9
+ :content_items
10
+
11
+ def initialize
12
+ @content_items = []
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenWebslides
4
+ module Converter
5
+ VERSION = '0.1.0'
6
+ end
7
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'openwebslides/converter/version'
6
+
7
+ Gem::Specification.new do |gem|
8
+ gem.name = 'openwebslides-converter'
9
+ gem.version = OpenWebslides::Converter::VERSION
10
+ gem.summary = 'Open Webslides Converter'
11
+ gem.description = 'Converts arbitrary content data formats to Open Webslides data format'
12
+ gem.license = 'MIT'
13
+ gem.authors = ['Florian Dejonckheere']
14
+ gem.email = 'florian@floriandejonckheere.be'
15
+ gem.homepage = 'https://rubygems.org/gems/openwebslides-converter'
16
+
17
+ gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
18
+
19
+ `git submodule --quiet foreach --recursive pwd`.split($INPUT_RECORD_SEPARATOR).each do |submodule|
20
+ submodule.sub!("#{Dir.pwd}/", '')
21
+
22
+ Dir.chdir(submodule) do
23
+ `git ls-files`.split($INPUT_RECORD_SEPARATOR).map do |subpath|
24
+ gem.files << File.join(submodule, subpath)
25
+ end
26
+ end
27
+ end
28
+
29
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
30
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
31
+ gem.require_paths = ['lib']
32
+
33
+ gem.add_dependency 'nokogiri', '~> 1.8'
34
+ gem.add_dependency 'reverse_markdown', '~> 1.1'
35
+
36
+ gem.add_development_dependency 'bundler', '~> 1.10'
37
+ gem.add_development_dependency 'coveralls', '~> 0.8'
38
+ gem.add_development_dependency 'rake', '~> 10.0'
39
+ gem.add_development_dependency 'rdoc', '~> 4.0'
40
+ gem.add_development_dependency 'rspec', '~> 3.0'
41
+ gem.add_development_dependency 'rubocop', '~> 0.58'
42
+ gem.add_development_dependency 'rubygems-tasks', '~> 0.2'
43
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::ContentItem do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ ##
16
+ # Subject
17
+ #
18
+ ##
19
+ # Tests
20
+ #
21
+ describe 'included properties' do
22
+ it { is_expected.to respond_to :id }
23
+ it { is_expected.to respond_to :type }
24
+ end
25
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::Heading do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ ##
16
+ # Subject
17
+ #
18
+ ##
19
+ # Tests
20
+ #
21
+ describe 'included properties' do
22
+ it { is_expected.to respond_to :id }
23
+ it { is_expected.to respond_to :type }
24
+ it { is_expected.to respond_to :text }
25
+ it { is_expected.to respond_to :sub_item_ids }
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::Paragraph do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ ##
16
+ # Subject
17
+ #
18
+ ##
19
+ # Tests
20
+ #
21
+ describe 'included properties' do
22
+ it { is_expected.to respond_to :id }
23
+ it { is_expected.to respond_to :type }
24
+ it { is_expected.to respond_to :text }
25
+ it { is_expected.to respond_to :sub_item_ids }
26
+ end
27
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::Properties::Container do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ let(:object) do
16
+ obj = Object.new
17
+ # Always include Identifier as top of the #to_h chain
18
+ obj.extend Content::Properties::Identifier
19
+ obj.extend described_class
20
+
21
+ obj
22
+ end
23
+
24
+ ##
25
+ # Subject
26
+ #
27
+ subject { object }
28
+
29
+ ##
30
+ # Tests
31
+ #
32
+ describe 'included properties' do
33
+ it { is_expected.to respond_to :child_item_ids }
34
+ it { is_expected.to respond_to :to_h }
35
+
36
+ it 'sets a default value for the property' do
37
+ expect(subject.child_item_ids).not_to be_nil
38
+ end
39
+ end
40
+
41
+ describe 'methods' do
42
+ describe '#to_h' do
43
+ it 'returns a hash with the property' do
44
+ expect(subject.to_h).to include :child_item_ids
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::Properties::Identifier do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ let(:object) do
16
+ obj = Object.new
17
+ obj.extend described_class
18
+ end
19
+
20
+ ##
21
+ # Subject
22
+ #
23
+ subject { object }
24
+
25
+ ##
26
+ # Tests
27
+ #
28
+ describe 'included properties' do
29
+ it { is_expected.to respond_to :id }
30
+ it { is_expected.to respond_to :to_h }
31
+
32
+ it 'sets a default value for the property' do
33
+ expect(subject.id).not_to be_nil
34
+ end
35
+ end
36
+
37
+ describe 'methods' do
38
+ describe '#to_h' do
39
+ it 'returns a hash with the property' do
40
+ expect(subject.to_h).to include :id
41
+ end
42
+ end
43
+
44
+ describe '#generate_id' do
45
+ it 'returns a 10-digit string' do
46
+ expect(subject.send :generate_id).to be_a String
47
+ expect(subject.send(:generate_id).length).to eq 10
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'spec_helper'
4
+
5
+ RSpec.describe Content::Properties::Metadata do
6
+ ##
7
+ # Configuration
8
+ #
9
+ ##
10
+ # Stubs and mocks
11
+ #
12
+ ##
13
+ # Test variables
14
+ #
15
+ let(:object) do
16
+ obj = Object.new
17
+ # Always include Identifier as top of the #to_h chain
18
+ obj.extend Content::Properties::Identifier
19
+ obj.extend described_class
20
+
21
+ obj
22
+ end
23
+
24
+ ##
25
+ # Subject
26
+ #
27
+ subject { object }
28
+
29
+ ##
30
+ # Tests
31
+ #
32
+ describe 'included properties' do
33
+ it { is_expected.to respond_to :metadata }
34
+ it { is_expected.to respond_to :to_h }
35
+
36
+ it 'sets a default value for the property' do
37
+ expect(subject.metadata).not_to be_nil
38
+ end
39
+ end
40
+
41
+ describe 'methods' do
42
+ describe '#to_h' do
43
+ it 'returns a hash with the property' do
44
+ expect(subject.to_h).to include :metadata
45
+ end
46
+ end
47
+ end
48
+ end