contentful_converter 0.0.1.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9bcee9dc48afd4c917fb46ba299eb5cad6030fac447ba86de0ae51ad03eef629
4
+ data.tar.gz: e51cd46ed8ec8eed058d2461d78c06f0de7128aaac5a7d867c4992d4cea41c88
5
+ SHA512:
6
+ metadata.gz: f6d89e89940edeca060d2c2eab090b3518e87a7282cca6eb27f77978c400935cf40a34a8e355348032b624b936fded51206dfda82ab517acd9130ee1930b231c
7
+ data.tar.gz: c0c490af7c05434c146f198909e367f8ef861bdd87c126b77906b7e721262549025e39bb4239d73bac87a9af716978446a5b7dd5d283b32be89bbc1710e91a36
data/README.md ADDED
@@ -0,0 +1,84 @@
1
+ ## Contentful HTML to Rich Text converter
2
+
3
+ [![Build Status](https://travis-ci.org/AlexAvlonitis/contentful_converter.svg?branch=master)](https://travis-ci.org/AlexAvlonitis/contentful_converter)
4
+
5
+ Converts plain html string to contentful specific rich_text hash structure.
6
+
7
+ *WIP, does not cover all html elements, contributions are welcome*
8
+
9
+ ### Install
10
+ ```ruby
11
+ # Rails
12
+ gem 'contentful_converter'
13
+
14
+ # Ruby
15
+ gem install 'contentful_converter'
16
+
17
+ require 'contentful_converter
18
+
19
+ ```
20
+
21
+ ### Run
22
+
23
+ ```ruby
24
+ ContentfulConverter.convert('<h3>hello world</h3>')
25
+
26
+ # OUTPUT
27
+ {
28
+ :nodeType=>"document",
29
+ :data=>{},
30
+ :content=>[
31
+ {
32
+ :nodeType=>"heading-3",
33
+ :data=>{},
34
+ :content=>[
35
+ {
36
+ :marks=>[],
37
+ :value=>"hello world",
38
+ :nodeType=>"text",
39
+ :data=>{}
40
+ }
41
+ ]
42
+ }
43
+ ]
44
+ }
45
+ ```
46
+
47
+ ### Additional info
48
+ **HREF links**
49
+
50
+ * HTML hyperlinks with full URL e.g: (https://google.com), will be converted into URL hyperlinks
51
+
52
+ * HTML hyperlinks without a scheme e.g: ('/aboutus/contact'), will be converted into ENTRY hyperlinks, with the href value as an ID
53
+
54
+ * HTML hyperlinks without a scheme but with an extension e.g: ('myfile.docx'), will be converted into ASSET hyperlinks, with the href value as an ID, minus the extension.
55
+
56
+ ---
57
+
58
+ ### Tests
59
+ ```ruby
60
+ # Unit tests
61
+ rspec
62
+
63
+ # Feature tests
64
+ rspec ./spec/features/*
65
+ ```
66
+
67
+ ### Contributions
68
+ * Fork it
69
+ * Create a branch
70
+ * Add your changes and tests
71
+ * Submit a PR
72
+
73
+ ### License
74
+
75
+ Copyright (C) 2019 Alex Avlonitis
76
+
77
+ This program is free software: you can redistribute it and/or modify
78
+ it under the terms of the GNU General Public License as published by
79
+ the Free Software Foundation, version 3.
80
+
81
+ This program is distributed in the hope that it will be useful,
82
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
83
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
84
+ GNU General Public License for more details.
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'contentful_converter/tree_cloner'
5
+ require 'contentful_converter/nokogiri_builder'
6
+
7
+ module ContentfulConverter
8
+ class Converter
9
+ class << self
10
+ def convert(html)
11
+ raise_error_unless_string(html)
12
+
13
+ convert_to_rich_text(nokogiri_fragment(html))
14
+ end
15
+
16
+ private
17
+
18
+ def raise_error_unless_string(param)
19
+ return if param.is_a?(String)
20
+
21
+ raise ArgumentError, 'Converter param needs to be a string'
22
+ end
23
+
24
+ def convert_to_rich_text(nokogiri_fragment)
25
+ TreeCloner.nokogiri_to_rich_text(nokogiri_fragment)
26
+ end
27
+
28
+ def nokogiri_fragment(html)
29
+ NokogiriBuilder.build(html)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/document'
4
+ require 'contentful_converter/nodes/header'
5
+ require 'contentful_converter/nodes/paragraph'
6
+ require 'contentful_converter/nodes/blockquote'
7
+ require 'contentful_converter/nodes/text'
8
+ require 'contentful_converter/nodes/underline'
9
+ require 'contentful_converter/nodes/italic'
10
+ require 'contentful_converter/nodes/strong'
11
+ require 'contentful_converter/nodes/code'
12
+ require 'contentful_converter/nodes/ordered_list'
13
+ require 'contentful_converter/nodes/unordered_list'
14
+ require 'contentful_converter/nodes/horizontal_line'
15
+ require 'contentful_converter/nodes/list_item'
16
+ require 'contentful_converter/nodes/hyperlink'
17
+
18
+ module ContentfulConverter
19
+ class NodeBuilder
20
+ DEFAULT_MAPPINGS = {
21
+ '#document-fragment' => Nodes::Document,
22
+ 'h1' => Nodes::Header,
23
+ 'h2' => Nodes::Header,
24
+ 'h3' => Nodes::Header,
25
+ 'h4' => Nodes::Header,
26
+ 'h5' => Nodes::Header,
27
+ 'h6' => Nodes::Header,
28
+ 'text' => Nodes::Text,
29
+ 'i' => Nodes::Italic,
30
+ 'em' => Nodes::Italic,
31
+ 'u' => Nodes::Underline,
32
+ 'b' => Nodes::Strong,
33
+ 'code' => Nodes::Code,
34
+ 'strong' => Nodes::Strong,
35
+ 'p' => Nodes::Paragraph,
36
+ 'div' => Nodes::Paragraph,
37
+ 'br' => Nodes::Paragraph,
38
+ 'section' => Nodes::Paragraph,
39
+ 'hr' => Nodes::HorizontalLine,
40
+ 'blockquote' => Nodes::Blockquote,
41
+ 'ul' => Nodes::UnorderedList,
42
+ 'ol' => Nodes::OrderedList,
43
+ 'li' => Nodes::ListItem,
44
+ 'a' => Nodes::Hyperlink
45
+ }.freeze
46
+
47
+ def self.build(nokogiri_node, parent = nil)
48
+ rich_text_node = DEFAULT_MAPPINGS[nokogiri_node.name]
49
+
50
+ unless rich_text_node
51
+ raise "'#{nokogiri_node.name}' Node type, does not exist"
52
+ end
53
+
54
+ rich_text_node.new(nokogiri_node, parent)
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ContentfulConverter
4
+ module Nodes
5
+ class Base
6
+ attr_reader :node_type, :content, :nokogiri_node, :parent
7
+
8
+ def initialize(nokogiri_node = nil, parent = nil)
9
+ @nokogiri_node = nokogiri_node
10
+ @parent = parent
11
+ @node_type = type
12
+ @content = []
13
+ end
14
+
15
+ def add_content(node)
16
+ @content << node
17
+ end
18
+
19
+ def to_h(params = options)
20
+ params[:nodeType] = node_type
21
+ params[:data] = params[:data] || {}
22
+ params[:content] = content.map(&:to_h).compact
23
+ params
24
+ end
25
+
26
+ def needs_p_wrapping?
27
+ if parent.nil? ||
28
+ parent&.class == Nodes::Header ||
29
+ parent&.class == Nodes::Paragraph ||
30
+ parent&.class == Nodes::Hyperlink
31
+
32
+ return false
33
+ end
34
+
35
+ true
36
+ end
37
+
38
+ private
39
+
40
+ def value
41
+ nokogiri_node.content
42
+ end
43
+
44
+ def type
45
+ raise NotImplementedError
46
+ end
47
+
48
+ def options
49
+ {}
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Blockquote < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'blockquote'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/text'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Code < Text
8
+ def marks
9
+ ['code']
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Document < Base
8
+ private
9
+
10
+ def type
11
+ 'document'
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Header < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ "heading-#{header_size}"
16
+ end
17
+
18
+ def header_size
19
+ nokogiri_node.name.split('h').last
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class HorizontalLine < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'hr'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+ require 'uri'
5
+
6
+ module ContentfulConverter
7
+ module Nodes
8
+ class Hyperlink < Base
9
+ private
10
+
11
+ def type
12
+ return 'asset-hyperlink' if !(uri_scheme?) && uri_extension?
13
+ return 'entry-hyperlink' unless uri_scheme?
14
+
15
+ 'hyperlink'
16
+ end
17
+
18
+ def options
19
+ return hyperlink_entry_option("Asset") if !(uri_scheme?) && uri_extension?
20
+ return hyperlink_entry_option("Entry") unless uri_scheme?
21
+
22
+ hyperlink_option
23
+ end
24
+
25
+ def hyperlink_option
26
+ { data: { uri: parsed_href.to_s } }
27
+ end
28
+
29
+ def hyperlink_entry_option(type)
30
+ {
31
+ data: {
32
+ target: {
33
+ sys: {
34
+ id: parsed_href.to_s.split('.').first,
35
+ type: "Link",
36
+ linkType: type
37
+ }
38
+ }
39
+ }
40
+ }
41
+ end
42
+
43
+ def uri_scheme?
44
+ parsed_href.scheme
45
+ end
46
+
47
+ def uri_extension?
48
+ parsed_href.to_s.split('.')[1]
49
+ end
50
+
51
+ def parsed_href
52
+ return URI(href_value) if href_value
53
+
54
+ URI('')
55
+ end
56
+
57
+ def href_value
58
+ nokogiri_node['href']
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/text'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Italic < Text
8
+ def marks
9
+ ['italic']
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class ListItem < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'list-item'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class OrderedList < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'ordered-list'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Paragraph < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'paragraph'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/text'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Strong < Text
8
+ def marks
9
+ ['bold']
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Text < Base
8
+ def to_h(params = options)
9
+ super
10
+ params.delete(:content)
11
+ params
12
+ end
13
+
14
+ private
15
+
16
+ def type
17
+ 'text'
18
+ end
19
+
20
+ def options
21
+ {
22
+ value: value,
23
+ marks: marks.map { |mark| { type: mark } }
24
+ }
25
+ end
26
+
27
+ def marks
28
+ []
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/text'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class Underline < Text
8
+ def marks
9
+ ['underline']
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/nodes/base'
4
+
5
+ module ContentfulConverter
6
+ module Nodes
7
+ class UnorderedList < Base
8
+ def needs_p_wrapping?
9
+ false
10
+ end
11
+
12
+ private
13
+
14
+ def type
15
+ 'unordered-list'
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+
5
+ module ContentfulConverter
6
+ class NokogiriBuilder
7
+ class << self
8
+ def build(html)
9
+ doc = Nokogiri::HTML.fragment(sanitize(html))
10
+ doc = normalize_lists(doc) if find_li(doc).any?
11
+ doc
12
+ end
13
+
14
+ private
15
+
16
+ def sanitize(html)
17
+ html = html.dup
18
+ html.gsub!('div>', 'p>')
19
+ html.gsub!('section>', 'p>')
20
+ html
21
+ end
22
+
23
+ def normalize_lists(nokogiri_fragment)
24
+ find_li(nokogiri_fragment).each { |li_node| wrap_parents_in_ul(li_node) }
25
+
26
+ nokogiri_fragment
27
+ end
28
+
29
+ def find_li(nokogiri_fragment)
30
+ nokogiri_fragment.css('li')
31
+ end
32
+
33
+ def wrap_parents_in_ul(node)
34
+ return if node.parent.name == 'ul' || node.parent.name == 'ol'
35
+
36
+ node.wrap('<ul>')
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/node_builder'
4
+
5
+ module ContentfulConverter
6
+ class TreeCloner
7
+ class << self
8
+ def nokogiri_to_rich_text(nokogiri_fragment)
9
+ if nokogiri_fragment.children.empty?
10
+ return NodeBuilder.build(nokogiri_fragment).to_h
11
+ end
12
+
13
+ traverse_and_clone(nokogiri_fragment).to_h
14
+ end
15
+
16
+ private
17
+
18
+ def traverse_and_clone(nokogiri_fragment)
19
+ rich_root_node = NodeBuilder.build(nokogiri_fragment)
20
+
21
+ noko_stack = [nokogiri_fragment]
22
+ rich_stack = [rich_root_node]
23
+
24
+ while noko_stack.any?
25
+ noko_node = noko_stack.pop
26
+ rich_node = rich_stack.pop
27
+
28
+ next unless noko_node.children.any?
29
+
30
+ noko_node.children.each do |child_node|
31
+ rich_child_node = NodeBuilder.build(child_node, rich_node)
32
+
33
+ noko_stack << child_node
34
+ rich_stack << rich_child_node
35
+ rich_node.add_content(wrap_in_paragraph(rich_child_node))
36
+ end
37
+ end
38
+
39
+ rich_root_node
40
+ end
41
+
42
+ def wrap_in_paragraph(node)
43
+ node.needs_p_wrapping? ? p_wrapper(node) : node
44
+ end
45
+
46
+ def p_wrapper(node)
47
+ p_node = Nodes::Paragraph.new(nil, node.parent)
48
+ p_node.add_content(node)
49
+ p_node
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ContentfulConverter
4
+ VERSION = '0.0.1.12'
5
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'contentful_converter/converter'
4
+
5
+ module ContentfulConverter
6
+ def self.convert(html)
7
+ Converter.convert(html)
8
+ end
9
+ end
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: contentful_converter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1.12
5
+ platform: ruby
6
+ authors:
7
+ - Alex Avlonitis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-12-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.9'
41
+ description: Converts HTML text to Rich Text Contentful specific JSON structure
42
+ email:
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - README.md
48
+ - lib/contentful_converter.rb
49
+ - lib/contentful_converter/converter.rb
50
+ - lib/contentful_converter/node_builder.rb
51
+ - lib/contentful_converter/nodes/base.rb
52
+ - lib/contentful_converter/nodes/blockquote.rb
53
+ - lib/contentful_converter/nodes/code.rb
54
+ - lib/contentful_converter/nodes/document.rb
55
+ - lib/contentful_converter/nodes/header.rb
56
+ - lib/contentful_converter/nodes/horizontal_line.rb
57
+ - lib/contentful_converter/nodes/hyperlink.rb
58
+ - lib/contentful_converter/nodes/italic.rb
59
+ - lib/contentful_converter/nodes/list_item.rb
60
+ - lib/contentful_converter/nodes/ordered_list.rb
61
+ - lib/contentful_converter/nodes/paragraph.rb
62
+ - lib/contentful_converter/nodes/strong.rb
63
+ - lib/contentful_converter/nodes/text.rb
64
+ - lib/contentful_converter/nodes/underline.rb
65
+ - lib/contentful_converter/nodes/unordered_list.rb
66
+ - lib/contentful_converter/nokogiri_builder.rb
67
+ - lib/contentful_converter/tree_cloner.rb
68
+ - lib/contentful_converter/version.rb
69
+ homepage: https://github.com/AlexAvlonitis/contentful_converter
70
+ licenses:
71
+ - MIT
72
+ metadata: {}
73
+ post_install_message:
74
+ rdoc_options: []
75
+ require_paths:
76
+ - lib
77
+ required_ruby_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ requirements: []
88
+ rubygems_version: 3.0.3
89
+ signing_key:
90
+ specification_version: 4
91
+ summary: Contentful HTML to Rich Text Converter
92
+ test_files: []