contentful_converter 0.0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9bcee9dc48afd4c917fb46ba299eb5cad6030fac447ba86de0ae51ad03eef629
4
+ data.tar.gz: e51cd46ed8ec8eed058d2461d78c06f0de7128aaac5a7d867c4992d4cea41c88
5
+ SHA512:
6
+ metadata.gz: f6d89e89940edeca060d2c2eab090b3518e87a7282cca6eb27f77978c400935cf40a34a8e355348032b624b936fded51206dfda82ab517acd9130ee1930b231c
7
+ data.tar.gz: c0c490af7c05434c146f198909e367f8ef861bdd87c126b77906b7e721262549025e39bb4239d73bac87a9af716978446a5b7dd5d283b32be89bbc1710e91a36
data/README.md ADDED
@@ -0,0 +1,84 @@
1
+ ## Contentful HTML to Rich Text converter
2
+
3
+ [![Build Status](https://travis-ci.org/AlexAvlonitis/contentful_converter.svg?branch=master)](https://travis-ci.org/AlexAvlonitis/contentful_converter)
4
+
5
+ Converts plain html string to contentful specific rich_text hash structure.
6
+
7
+ *WIP, does not cover all html elements, contributions are welcome*
8
+
9
+ ### Install
10
+ ```ruby
11
+ # Rails
12
+ gem 'contentful_converter'
13
+
14
+ # Ruby
15
+ gem install 'contentful_converter'
16
+
17
+ require 'contentful_converter
18
+
19
+ ```
20
+
21
+ ### Run
22
+
23
+ ```ruby
24
+ ContentfulConverter.convert('<h3>hello world</h3>')
25
+
26
+ # OUTPUT
27
+ {
28
+ :nodeType=>"document",
29
+ :data=>{},
30
+ :content=>[
31
+ {
32
+ :nodeType=>"heading-3",
33
+ :data=>{},
34
+ :content=>[
35
+ {
36
+ :marks=>[],
37
+ :value=>"hello world",
38
+ :nodeType=>"text",
39
+ :data=>{}
40
+ }
41
+ ]
42
+ }
43
+ ]
44
+ }
45
+ ```
46
+
47
+ ### Additional info
48
+ **HREF links**
49
+
50
+ * HTML hyperlinks with full URL e.g: (https://google.com), will be converted into URL hyperlinks
51
+
52
+ * HTML hyperlinks without a scheme e.g: ('/aboutus/contact'), will be converted into ENTRY hyperlinks, with the href value as an ID
53
+
54
+ * HTML hyperlinks without a scheme but with an extension e.g: ('myfile.docx'), will be converted into ASSET hyperlinks, with the href value as an ID, minus the extension.
55
+
56
+ ---
57
+
58
+ ### Tests
59
+ ```ruby
60
+ # Unit tests
61
+ rspec
62
+
63
+ # Feature tests
64
+ rspec ./spec/features/*
65
+ ```
66
+
67
+ ### Contributions
68
+ * Fork it
69
+ * Create a branch
70
+ * Add your changes and tests
71
+ * Submit a PR
72
+
73
+ ### License
74
+
75
+ Copyright (C) 2019 Alex Avlonitis
76
+
77
+ This program is free software: you can redistribute it and/or modify
78
+ it under the terms of the GNU General Public License as published by
79
+ the Free Software Foundation, version 3.
80
+
81
+ This program is distributed in the hope that it will be useful,
82
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
83
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
84
+ GNU General Public License for more details.
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'contentful_converter/tree_cloner'
5
+ require 'contentful_converter/nokogiri_builder'
6
+
7
+ module ContentfulConverter
8
+ class Converter
9
+ class << self
10
+ def convert(html)
11
+ raise_error_unless_string(html)
12
+
13
+ convert_to_rich_text(nokogiri_fragment(html))
14
+ end
15
+
16
+ private
17
+
18
+ def raise_error_unless_string(param)
19
+ return if param.is_a?(String)
20
+
21
+ raise ArgumentError, 'Converter param needs to be a string'
22
+ end
23
+
24
+ def convert_to_rich_text(nokogiri_fragment)
25
+ TreeCloner.nokogiri_to_rich_text(nokogiri_fragment)
26
+ end
27
+
28
+ def nokogiri_fragment(html)
29
+ NokogiriBuilder.build(html)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/document'
4
+ require 'contentful_converter/nodes/header'
5
+ require 'contentful_converter/nodes/paragraph'
6
+ require 'contentful_converter/nodes/blockquote'
7
+ require 'contentful_converter/nodes/text'
8
+ require 'contentful_converter/nodes/underline'
9
+ require 'contentful_converter/nodes/italic'
10
+ require 'contentful_converter/nodes/strong'
11
+ require 'contentful_converter/nodes/code'
12
+ require 'contentful_converter/nodes/ordered_list'
13
+ require 'contentful_converter/nodes/unordered_list'
14
+ require 'contentful_converter/nodes/horizontal_line'
15
+ require 'contentful_converter/nodes/list_item'
16
+ require 'contentful_converter/nodes/hyperlink'
17
+
18
+ module ContentfulConverter
19
+ class NodeBuilder
20
+ DEFAULT_MAPPINGS = {
21
+ '#document-fragment' => Nodes::Document,
22
+ 'h1' => Nodes::Header,
23
+ 'h2' => Nodes::Header,
24
+ 'h3' => Nodes::Header,
25
+ 'h4' => Nodes::Header,
26
+ 'h5' => Nodes::Header,
27
+ 'h6' => Nodes::Header,
28
+ 'text' => Nodes::Text,
29
+ 'i' => Nodes::Italic,
30
+ 'em' => Nodes::Italic,
31
+ 'u' => Nodes::Underline,
32
+ 'b' => Nodes::Strong,
33
+ 'code' => Nodes::Code,
34
+ 'strong' => Nodes::Strong,
35
+ 'p' => Nodes::Paragraph,
36
+ 'div' => Nodes::Paragraph,
37
+ 'br' => Nodes::Paragraph,
38
+ 'section' => Nodes::Paragraph,
39
+ 'hr' => Nodes::HorizontalLine,
40
+ 'blockquote' => Nodes::Blockquote,
41
+ 'ul' => Nodes::UnorderedList,
42
+ 'ol' => Nodes::OrderedList,
43
+ 'li' => Nodes::ListItem,
44
+ 'a' => Nodes::Hyperlink
45
+ }.freeze
46
+
47
+ def self.build(nokogiri_node, parent = nil)
48
+ rich_text_node = DEFAULT_MAPPINGS[nokogiri_node.name]
49
+
50
+ unless rich_text_node
51
+ raise "'#{nokogiri_node.name}' Node type, does not exist"
52
+ end
53
+
54
+ rich_text_node.new(nokogiri_node, parent)
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ContentfulConverter
4
+ module Nodes
5
+ class Base
6
+ attr_reader :node_type, :content, :nokogiri_node, :parent
7
+
8
+ def initialize(nokogiri_node = nil, parent = nil)
9
+ @nokogiri_node = nokogiri_node
10
+ @parent = parent
11
+ @node_type = type
12
+ @content = []
13
+ end
14
+
15
+ def add_content(node)
16
+ @content << node
17
+ end
18
+
19
+ def to_h(params = options)
20
+ params[:nodeType] = node_type
21
+ params[:data] = params[:data] || {}
22
+ params[:content] = content.map(&:to_h).compact
23
+ params
24
+ end
25
+
26
+ def needs_p_wrapping?
27
+ if parent.nil? ||
28
+ parent&.class == Nodes::Header ||
29
+ parent&.class == Nodes::Paragraph ||
30
+ parent&.class == Nodes::Hyperlink
31
+
32
+ return false
33
+ end
34
+
35
+ true
36
+ end
37
+
38
+ private
39
+
40
+ def value
41
+ nokogiri_node.content
42
+ end
43
+
44
+ def type
45
+ raise NotImplementedError
46
+ end
47
+
48
+ def options
49
+ {}
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Blockquote < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'blockquote'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/text'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Code < Text
8
+ def marks
9
+ ['code']
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Document < Base
8
+ private
9
+
10
+ def type
11
+ 'document'
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Header < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ "heading-#{header_size}"
16
+ end
17
+
18
+ def header_size
19
+ nokogiri_node.name.split('h').last
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class HorizontalLine < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'hr'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+ require 'uri'
5
+
6
+ module ContentfulConverter
7
+ module Nodes
8
+ class Hyperlink < Base
9
+ private
10
+
11
+ def type
12
+ return 'asset-hyperlink' if !(uri_scheme?) && uri_extension?
13
+ return 'entry-hyperlink' unless uri_scheme?
14
+
15
+ 'hyperlink'
16
+ end
17
+
18
+ def options
19
+ return hyperlink_entry_option("Asset") if !(uri_scheme?) && uri_extension?
20
+ return hyperlink_entry_option("Entry") unless uri_scheme?
21
+
22
+ hyperlink_option
23
+ end
24
+
25
+ def hyperlink_option
26
+ { data: { uri: parsed_href.to_s } }
27
+ end
28
+
29
+ def hyperlink_entry_option(type)
30
+ {
31
+ data: {
32
+ target: {
33
+ sys: {
34
+ id: parsed_href.to_s.split('.').first,
35
+ type: "Link",
36
+ linkType: type
37
+ }
38
+ }
39
+ }
40
+ }
41
+ end
42
+
43
+ def uri_scheme?
44
+ parsed_href.scheme
45
+ end
46
+
47
+ def uri_extension?
48
+ parsed_href.to_s.split('.')[1]
49
+ end
50
+
51
+ def parsed_href
52
+ return URI(href_value) if href_value
53
+
54
+ URI('')
55
+ end
56
+
57
+ def href_value
58
+ nokogiri_node['href']
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/text'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Italic < Text
8
+ def marks
9
+ ['italic']
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class ListItem < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'list-item'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class OrderedList < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'ordered-list'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Paragraph < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'paragraph'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/text'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Strong < Text
8
+ def marks
9
+ ['bold']
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Text < Base
8
+ def to_h(params = options)
9
+ super
10
+ params.delete(:content)
11
+ params
12
+ end
13
+
14
+ private
15
+
16
+ def type
17
+ 'text'
18
+ end
19
+
20
+ def options
21
+ {
22
+ value: value,
23
+ marks: marks.map { |mark| { type: mark } }
24
+ }
25
+ end
26
+
27
+ def marks
28
+ []
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/text'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Underline < Text
8
+ def marks
9
+ ['underline']
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class UnorderedList < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'unordered-list'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+
5
+ module ContentfulConverter
6
+ class NokogiriBuilder
7
+ class << self
8
+ def build(html)
9
+ doc = Nokogiri::HTML.fragment(sanitize(html))
10
+ doc = normalize_lists(doc) if find_li(doc).any?
11
+ doc
12
+ end
13
+
14
+ private
15
+
16
+ def sanitize(html)
17
+ html = html.dup
18
+ html.gsub!('div>', 'p>')
19
+ html.gsub!('section>', 'p>')
20
+ html
21
+ end
22
+
23
+ def normalize_lists(nokogiri_fragment)
24
+ find_li(nokogiri_fragment).each { |li_node| wrap_parents_in_ul(li_node) }
25
+
26
+ nokogiri_fragment
27
+ end
28
+
29
+ def find_li(nokogiri_fragment)
30
+ nokogiri_fragment.css('li')
31
+ end
32
+
33
+ def wrap_parents_in_ul(node)
34
+ return if node.parent.name == 'ul' || node.parent.name == 'ol'
35
+
36
+ node.wrap('<ul>')
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/node_builder'
4
+
5
+ module ContentfulConverter
6
+ class TreeCloner
7
+ class << self
8
+ def nokogiri_to_rich_text(nokogiri_fragment)
9
+ if nokogiri_fragment.children.empty?
10
+ return NodeBuilder.build(nokogiri_fragment).to_h
11
+ end
12
+
13
+ traverse_and_clone(nokogiri_fragment).to_h
14
+ end
15
+
16
+ private
17
+
18
+ def traverse_and_clone(nokogiri_fragment)
19
+ rich_root_node = NodeBuilder.build(nokogiri_fragment)
20
+
21
+ noko_stack = [nokogiri_fragment]
22
+ rich_stack = [rich_root_node]
23
+
24
+ while noko_stack.any?
25
+ noko_node = noko_stack.pop
26
+ rich_node = rich_stack.pop
27
+
28
+ next unless noko_node.children.any?
29
+
30
+ noko_node.children.each do |child_node|
31
+ rich_child_node = NodeBuilder.build(child_node, rich_node)
32
+
33
+ noko_stack << child_node
34
+ rich_stack << rich_child_node
35
+ rich_node.add_content(wrap_in_paragraph(rich_child_node))
36
+ end
37
+ end
38
+
39
+ rich_root_node
40
+ end
41
+
42
+ def wrap_in_paragraph(node)
43
+ node.needs_p_wrapping? ? p_wrapper(node) : node
44
+ end
45
+
46
+ def p_wrapper(node)
47
+ p_node = Nodes::Paragraph.new(nil, node.parent)
48
+ p_node.add_content(node)
49
+ p_node
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ContentfulConverter
4
+ VERSION = '0.0.1.12'
5
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/converter'
4
+
5
+ module ContentfulConverter
6
+ def self.convert(html)
7
+ Converter.convert(html)
8
+ end
9
+ end
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: contentful_converter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1.12
5
+ platform: ruby
6
+ authors:
7
+ - Alex Avlonitis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-12-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.9'
41
+ description: Converts HTML text to Rich Text Contentful specific JSON structure
42
+ email:
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - README.md
48
+ - lib/contentful_converter.rb
49
+ - lib/contentful_converter/converter.rb
50
+ - lib/contentful_converter/node_builder.rb
51
+ - lib/contentful_converter/nodes/base.rb
52
+ - lib/contentful_converter/nodes/blockquote.rb
53
+ - lib/contentful_converter/nodes/code.rb
54
+ - lib/contentful_converter/nodes/document.rb
55
+ - lib/contentful_converter/nodes/header.rb
56
+ - lib/contentful_converter/nodes/horizontal_line.rb
57
+ - lib/contentful_converter/nodes/hyperlink.rb
58
+ - lib/contentful_converter/nodes/italic.rb
59
+ - lib/contentful_converter/nodes/list_item.rb
60
+ - lib/contentful_converter/nodes/ordered_list.rb
61
+ - lib/contentful_converter/nodes/paragraph.rb
62
+ - lib/contentful_converter/nodes/strong.rb
63
+ - lib/contentful_converter/nodes/text.rb
64
+ - lib/contentful_converter/nodes/underline.rb
65
+ - lib/contentful_converter/nodes/unordered_list.rb
66
+ - lib/contentful_converter/nokogiri_builder.rb
67
+ - lib/contentful_converter/tree_cloner.rb
68
+ - lib/contentful_converter/version.rb
69
+ homepage: https://github.com/AlexAvlonitis/contentful_converter
70
+ licenses:
71
+ - MIT
72
+ metadata: {}
73
+ post_install_message:
74
+ rdoc_options: []
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ requirements: []
88
+ rubygems_version: 3.0.3
89
+ signing_key:
90
+ specification_version: 4
91
+ summary: Contentful HTML to Rich Text Converter
92
+ test_files: []