draftjs_html 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b708c9dfd4a3d4b6cdf22de377edd9a271e6eb527c70648d11337c1d090613e5
4
- data.tar.gz: e472de0d6b79d5b7cf7380d6ed89f8c85aedf79d30d329a42550b1cf94eb7dd7
3
+ metadata.gz: 7195ba4d0f0e659dfd6cb335ee461ca122d3db48f379e8c88c01000fbedefccb
4
+ data.tar.gz: 0ee5ec82ddcac4ae7acb99399a7fd6d52054d21f23abcb2a2ff61cca6bc2c9b7
5
5
  SHA512:
6
- metadata.gz: 6511068eab5eb7c99f7815c85bb6b40bd998f80cee7ed8b41a418d9295bf467f4ed4a1de47049342aa27ec02e0e29207031f1f3cfa9e853da13acc79d98e21b1
7
- data.tar.gz: bd939bb048b11692d520cfef3202b7e9d7106f36cac901cd4171f82991ccf24981b8cc610f0e61626231c463c84202d5dbb80b26221cecf1f68b9f48b435b32a
6
+ metadata.gz: 9ee9d2f946504b1c79b936d215edda2a17ec94970c356e2b59be224da471ace3c0389af24418a7c9d93b0d343b3d6681e4a8638e8bf83b7e776257f8f5babb2e
7
+ data.tar.gz: ebb33f7bfa4fd9a0ca6de27c4bd6a5ff8b24b4d0f480706edff7dbf1065ac89b28f4f6f075f06d9dcc03825c33e6e07df36166c4f1d333e5316b68a9b80454a1
data/Gemfile CHANGED
@@ -6,3 +6,5 @@ source "https://rubygems.org"
6
6
  gemspec
7
7
 
8
8
  gem "rake", "~> 13.0"
9
+ gem "pry-byebug", "~> 3.10"
10
+ gem 'simplecov', '~> 0.21'
data/README.md CHANGED
@@ -177,6 +177,44 @@ end
177
177
  # will nest HTML nodes as you probably want (e.g. "<p>hi!</p>")
178
178
  ```
179
179
 
180
+ ### FromHtml (beta)
181
+
182
+ As an experiment, this gem is providing the ability to convert from HTML to raw
183
+ DraftJS JSON. You can explore this behavior with the following snippet:
184
+
185
+ ```ruby
186
+ DraftjsHtml.from_html("<p>Hello!</p>") # => { "blocks" => [{ "text": "Hello!", "type" => "unstyled" } ] }
187
+ ```
188
+
189
+ There are some known limitations with this approach, but, if you're just trying
190
+ to get started, it may be good enough for you. Contributions and issue reports
191
+ are welcome and encouraged.
192
+
193
+ #### `:node_to_entity:`
194
+
195
+ This `FromHtml` option allows the user to specify how a particular node is
196
+ converted to a DraftJS entity. By default, the library converts `img` and `a`
197
+ tags to `IMAGE` and `LINK` entities, respectively. If you specify this option,
198
+ you override the existing behavior and must define those conversions yourself.
199
+
200
+ The option expects a `callable` (`proc`, `lambda`, etc) that receives 3 arguments:
201
+
202
+ - tagname (e.g. `a`) - always downcased
203
+ - content - the text content inside the tag
204
+ - HTML attributes - any HTML attributes on the tag as a Hash (string keys)
205
+
206
+ The callable should return a Hash with symbol keys. The supported values are:
207
+
208
+ - `type` (required)
209
+ - the entity "type" or name
210
+ - `mutability` (optional, default `'IMMUTABLE'`)
211
+ - either 'MUTABLE', 'IMMUTABLE', or 'SEGMENTED'
212
+ - `atomic` (optional, default `false`)
213
+ - when true, creates a new "atomic" block for this entity rather than apply
214
+ the entity to the current range
215
+ - `data` (optional, default `{}`)
216
+ - an arbitrary data-bag (Hash) of entity data
217
+
180
218
  ## Development
181
219
 
182
220
  After checking out the repo, run `bin/setup` to install dependencies. Then, run
@@ -14,8 +14,8 @@ module DraftjsHtml
14
14
  @entity_map = {}
15
15
  end
16
16
 
17
- def text_block(text)
18
- typed_block('unstyled', text)
17
+ def text_block(text, depth: 0)
18
+ typed_block('unstyled', text, depth: depth)
19
19
  end
20
20
 
21
21
  def typed_block(type, text, depth: 0)
@@ -40,6 +40,10 @@ module DraftjsHtml
40
40
  entity_range(key, range)
41
41
  end
42
42
 
43
+ def has_blocks?
44
+ @blocks.any?
45
+ end
46
+
43
47
  def to_h
44
48
  {
45
49
  'blocks' => @blocks,
@@ -47,6 +51,21 @@ module DraftjsHtml
47
51
  }
48
52
  end
49
53
 
54
+ def to_s
55
+ draftjs = DraftjsHtml::Draftjs.parse(to_h)
56
+ draftjs.blocks.reduce('') do |acc, block|
57
+ acc << "typed_block '#{block.type}', '#{block.text}', depth: #{block.depth}\n"
58
+ block.inline_styles.each do |style|
59
+ acc << "inline_style '#{style.name}', #{style.range.begin}..#{style.range.end} # (#{style.offset} + #{style.length})\n"
60
+ end
61
+ block.entity_ranges.each do |entity_range|
62
+ entity = draftjs.entity_map[entity_range.name]
63
+ acc << "apply_entity '#{entity.type}', #{entity_range.range.begin}..#{entity_range.range.end}\n"
64
+ end
65
+ acc
66
+ end
67
+ end
68
+
50
69
  private
51
70
 
52
71
  def deep_stringify_keys(object)
@@ -14,6 +14,7 @@ module DraftjsHtml
14
14
 
15
15
  def convert_block(block)
16
16
  {
17
+ 'key' => block.key,
17
18
  'text' => block.text,
18
19
  'type' => block.type,
19
20
  'depth' => block.depth,
@@ -0,0 +1,104 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ class DepthStack
4
+ def initialize
5
+ @stack = []
6
+ @nodes = []
7
+ @list_depth = -1
8
+ @style_stack = StyleStack.new
9
+ end
10
+
11
+ def push(tagname, attrs)
12
+ @stack << PendingBlock.from_tag(tagname, attrs, @nodes.dup, @list_depth)
13
+ track_block_node(tagname)
14
+ end
15
+
16
+ def push_parent(tagname, attrs)
17
+ @list_depth += 1
18
+ track_block_node(tagname)
19
+ end
20
+
21
+ def pop_parent(tagname, draftjs)
22
+ @nodes.pop
23
+ blocks = []
24
+ while current.depth >= 0
25
+ blocks << @stack.pop
26
+ @nodes.pop
27
+ end
28
+ blocks.reverse_each do |pending_block|
29
+ pending_block.flush_to(draftjs, @style_stack)
30
+ pending_block.apply_entities_to(draftjs)
31
+ end
32
+ @list_depth -= 1
33
+ end
34
+
35
+ def pop(draftjs)
36
+ return if @stack.empty?
37
+ return if inside_parent?
38
+
39
+ if @nodes.last == current.tagname && current.flushable?
40
+ flush_to(draftjs)
41
+ elsif @stack[-2]
42
+ @stack[-2].consume(current)
43
+ end
44
+
45
+ @stack.pop
46
+ @nodes.pop
47
+ end
48
+
49
+ def create_pending_entity(tagname, attrs)
50
+ current.pending_entities << { tagname: tagname, start: current_character_offset + 1, attrs: attrs }
51
+ end
52
+
53
+ def convert_pending_entities(conversion)
54
+ while current.pending_entities.any?
55
+ pending_entity = current.pending_entities.pop
56
+ range = pending_entity[:start]..current_character_offset
57
+ user_created_entity = conversion.call(pending_entity[:tagname], current_text_buffer[range], pending_entity[:attrs])
58
+ next unless user_created_entity
59
+
60
+ current.entities << user_created_entity.merge(start: range.begin, finish: range.end)
61
+ end
62
+ end
63
+
64
+ def style_start(tagname)
65
+ @style_stack.track_start(tagname, current_character_offset + 1)
66
+ end
67
+
68
+ def style_end(tagname)
69
+ @style_stack.track_end(tagname, current_character_offset)
70
+ end
71
+
72
+ def flush_to(draftjs)
73
+ current.flush_to(draftjs, @style_stack)
74
+ current.apply_entities_to(draftjs)
75
+ end
76
+
77
+ def append_text(chars)
78
+ current.text_buffer << chars unless chars.empty?
79
+ end
80
+
81
+ private
82
+
83
+ def current_text_buffer
84
+ current.text_buffer.join
85
+ end
86
+
87
+ def current_character_offset
88
+ current.character_offset
89
+ end
90
+
91
+ def track_block_node(name)
92
+ @nodes << name
93
+ end
94
+
95
+ def inside_parent?
96
+ (FromHtml::LIST_PARENT_ELEMENTS & @nodes).any?
97
+ end
98
+
99
+ def current
100
+ @stack.last
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,8 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ INLINE_STYLE_ELEMENTS = HtmlDefaults::HTML_STYLE_TAGS_TO_STYLE.keys.freeze
4
+ LIST_PARENT_ELEMENTS = %w[ol ul table].freeze
5
+ INLINE_NON_STYLE_ELEMENTS = %w[a abbr cite font img output q samp span thead tbody td time var].freeze
6
+ BLOCK_CONTENT_ELEMENTS = %w[p dl h1 h2 h3 h4 h5 h6].freeze
7
+ end
8
+ end
@@ -0,0 +1,85 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ PendingBlock = Struct.new(:tagname, :attrs, :chars, :entities, :pending_entities, :parent_tagnames, :depth, keyword_init: true) do
4
+ def self.from_tag(name, attrs, parent_tagnames, depth)
5
+ self.new(
6
+ tagname: name,
7
+ attrs: attrs,
8
+ entities: [],
9
+ chars: [],
10
+ pending_entities: [],
11
+ depth: depth,
12
+ parent_tagnames: parent_tagnames,
13
+ )
14
+ end
15
+
16
+ def text_buffer
17
+ self[:chars]
18
+ end
19
+
20
+ def clear_text_buffer
21
+ self[:chars] = []
22
+ end
23
+
24
+ def character_offset
25
+ text_buffer.join.length - 1
26
+ end
27
+
28
+ def flushable?
29
+ %w[OPENING ol ul li table].include?(parent_tagnames.last) ||
30
+ (parent_tagnames.last == 'div' && tagname != 'div')
31
+ end
32
+
33
+ def consume(other_pending_block)
34
+ self.text_buffer += other_pending_block.text_buffer
35
+ self.pending_entities += other_pending_block.pending_entities
36
+ self.entities += other_pending_block.entities
37
+ end
38
+
39
+ def flush_to(draftjs, styles)
40
+ if text_buffer.any?
41
+ chars.join.lines.each do |line|
42
+ draftjs.typed_block(block_name, line.strip, depth: [depth, 0].max)
43
+ end
44
+
45
+ styles.each do |descriptor|
46
+ finish = descriptor[:finish] || character_offset
47
+ draftjs.inline_style(descriptor[:style], descriptor[:start]..finish)
48
+ end
49
+ end
50
+
51
+ clear_text_buffer
52
+ styles.clear_finished
53
+ end
54
+
55
+ def apply_entities_to(draftjs)
56
+ Array(entities).each do |entity|
57
+ range = entity[:start]..entity[:finish]
58
+ if entity[:atomic]
59
+ draftjs.typed_block('atomic', ' ', depth: [depth, 0].max)
60
+ range = 0..1
61
+ elsif range.size < 1
62
+ draftjs.typed_block('atomic', ' ', depth: [depth, 0].max) unless draftjs.has_blocks?
63
+ range = (range.begin..range.end + 1)
64
+ end
65
+
66
+ draftjs.apply_entity entity[:type], range, data: entity[:data], mutability: entity.fetch(:mutability, 'IMMUTABLE')
67
+ end
68
+ end
69
+
70
+ def block_name
71
+ stack = parent_tagnames.last == 'li' ? parent_tagnames.last(2) : parent_tagnames.last(1)
72
+ return 'ordered-list-item' if stack.first == 'ol'
73
+ return 'unordered-list-item' if stack.first == 'ul'
74
+
75
+ DraftjsHtml::HtmlDefaults::BLOCK_TYPE_TO_HTML.invert.fetch(tagname, 'unstyled')
76
+ end
77
+
78
+ private
79
+
80
+ def text_buffer=(other)
81
+ self[:chars] = other
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,50 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ class StyleStack
4
+ def initialize
5
+ @stack = []
6
+ end
7
+
8
+ def clear_finished
9
+ @stack.delete_if { !!_1[:finish] }
10
+ end
11
+
12
+ def each(&block)
13
+ @stack.reverse_each.group_by { _1[:tagname] }.each do |_, descriptors|
14
+ overlapping_ranges = find_overlapping_styles(descriptors)
15
+ widest_descriptor = overlapping_ranges.max_by { (_1[:start].._1[:finish]).size }
16
+
17
+ applicable_styles = descriptors - overlapping_ranges + [widest_descriptor].compact
18
+ applicable_styles.each(&block)
19
+ end
20
+ end
21
+
22
+ def track_start(tagname, current_character_offset)
23
+ style = DraftjsHtml::HtmlDefaults::HTML_STYLE_TAGS_TO_STYLE[tagname]
24
+ @stack.unshift({ tagname: tagname, style: style, start: current_character_offset })
25
+ end
26
+
27
+ def track_end(tagname, current_character_offset)
28
+ descriptor_index = @stack.find_index { _1[:tagname] == tagname && !_1[:finish] }
29
+ descriptor = @stack[descriptor_index]
30
+ descriptor[:finish] = current_character_offset
31
+ end
32
+
33
+ private
34
+
35
+ def find_overlapping_styles(descriptors)
36
+ descriptors.select do |candidate_a|
37
+ candidate_range = candidate_a[:start]..candidate_a[:finish]
38
+ (descriptors - [candidate_a]).any? do |other|
39
+ other_range = other[:start]..other[:finish]
40
+ range_overlaps?(candidate_range, other_range)
41
+ end
42
+ end
43
+ end
44
+
45
+ def range_overlaps?(candidate_range, other_range)
46
+ other_range.begin == candidate_range.begin || candidate_range.cover?(other_range.begin) || other_range.cover?(candidate_range.begin)
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,107 @@
1
+ require 'stringio'
2
+ require_relative 'html_defaults'
3
+ require_relative 'from_html/elements'
4
+ require_relative 'from_html/style_stack'
5
+ require_relative 'from_html/pending_block'
6
+ require_relative 'from_html/depth_stack'
7
+
8
+ module DraftjsHtml
9
+ class FromHtml < Nokogiri::XML::SAX::Document
10
+
11
+ def initialize(options = {})
12
+ @draftjs = Draftjs::RawBuilder.new
13
+ @parser = Nokogiri::HTML4::SAX::Parser.new(self)
14
+ @depth_stack = DepthStack.new
15
+ @options = ensure_options!(options.dup)
16
+ end
17
+
18
+ def convert(raw_html)
19
+ convert_io(StringIO.new(raw_html))
20
+ end
21
+
22
+ def convert_io(html_io)
23
+ @parser.parse(html_io)
24
+ @draftjs.to_h
25
+ end
26
+
27
+ def characters(str)
28
+ content = strip_unnecessary_trailing_space(str)
29
+ @depth_stack.append_text(content)
30
+ end
31
+
32
+ def end_element(name)
33
+ track_pending_entity_end(name)
34
+
35
+ case name
36
+ when 'br' then return
37
+ when 'html', 'body', *FromHtml::INLINE_NON_STYLE_ELEMENTS
38
+ when *FromHtml::INLINE_STYLE_ELEMENTS
39
+ track_inline_style_end(name)
40
+ when *FromHtml::LIST_PARENT_ELEMENTS
41
+ @depth_stack.pop_parent(name, @draftjs)
42
+ else
43
+ @depth_stack.pop(@draftjs)
44
+ end
45
+ end
46
+
47
+ def start_element(name, attrs = [])
48
+ attributes = Hash[attrs]
49
+
50
+ case name
51
+ when 'br'
52
+ @depth_stack.append_text("\n")
53
+ when 'html', 'body', *FromHtml::INLINE_NON_STYLE_ELEMENTS
54
+ when *FromHtml::INLINE_STYLE_ELEMENTS
55
+ track_inline_style_start(name)
56
+ when *FromHtml::LIST_PARENT_ELEMENTS
57
+ @depth_stack.push_parent(name, attrs)
58
+ else
59
+ @depth_stack.push(name, attributes)
60
+ end
61
+
62
+ track_pending_entity_start(name, attributes)
63
+ end
64
+
65
+ def start_document
66
+ @depth_stack.push('OPENING', {})
67
+ end
68
+
69
+ def end_document
70
+ @depth_stack.flush_to(@draftjs)
71
+ end
72
+
73
+ private
74
+
75
+ def track_inline_style_start(tagname)
76
+ @depth_stack.style_start(tagname)
77
+ end
78
+
79
+ def track_inline_style_end(tagname)
80
+ @depth_stack.style_end(tagname)
81
+ end
82
+
83
+ def strip_unnecessary_trailing_space(str)
84
+ str
85
+ .gsub(/(\n+[[:space:]]*$)|(^\n+)/, '')
86
+ .gsub(/(^[[:space:]]+$)/, ' ')
87
+ end
88
+
89
+ def track_pending_entity_start(tagname, attrs)
90
+ @depth_stack.create_pending_entity(tagname, attrs)
91
+ end
92
+
93
+ def track_pending_entity_end(name)
94
+ @depth_stack.convert_pending_entities(@options[:node_to_entity])
95
+ end
96
+
97
+ def ensure_options!(opts)
98
+ opts[:node_to_entity] ||= ->(tagname, _content, attrs) {
99
+ case tagname
100
+ when 'a' then { type: 'LINK', mutability: 'MUTABLE', data: attrs }
101
+ when 'img' then { type: 'IMAGE', mutability: 'IMMUTABLE', atomic: true, data: attrs }
102
+ end
103
+ }
104
+ opts
105
+ end
106
+ end
107
+ end
@@ -1,8 +1,8 @@
1
1
  module DraftjsHtml
2
2
  module HtmlDefaults
3
3
  BLOCK_TYPE_TO_HTML = {
4
- 'unstyled' => 'p',
5
4
  'paragraph' => 'p',
5
+ 'unstyled' => 'p',
6
6
  'header-one' => 'h1',
7
7
  'header-two' => 'h2',
8
8
  'header-three' => 'h3',
@@ -23,6 +23,18 @@ module DraftjsHtml
23
23
  'UNDERLINE' => 'u',
24
24
  }.freeze
25
25
 
26
+ HTML_STYLE_TAGS_TO_STYLE = {
27
+ 'b' => 'BOLD',
28
+ 'i' => 'ITALIC',
29
+ 'em' => 'ITALIC',
30
+ 'del' => 'STRIKETHROUGH',
31
+ 'u' => 'UNDERLINE',
32
+ 'strong' => 'BOLD',
33
+ 'small' => 'SMALL',
34
+ 'sub' => 'SUBSCRIPT',
35
+ 'sup' => 'SUPERSCRIPT',
36
+ }.freeze
37
+
26
38
  ENTITY_ATTRIBUTE_NAME_MAP = {
27
39
  'className' => 'class',
28
40
  'url' => 'href',
@@ -47,4 +59,4 @@ module DraftjsHtml
47
59
  }
48
60
  }.freeze
49
61
  end
50
- end
62
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DraftjsHtml
4
- VERSION = "0.14.0"
4
+ VERSION = "0.15.0"
5
5
  end
data/lib/draftjs_html.rb CHANGED
@@ -4,6 +4,7 @@ require_relative "draftjs_html/version"
4
4
  require 'nokogiri'
5
5
  require_relative 'draftjs_html/draftjs'
6
6
  require_relative 'draftjs_html/to_html'
7
+ require_relative 'draftjs_html/from_html'
7
8
 
8
9
  module DraftjsHtml
9
10
  class Error < StandardError; end
@@ -11,4 +12,8 @@ module DraftjsHtml
11
12
  def self.to_html(raw_draftjs, options: {})
12
13
  ToHtml.new(options).convert(raw_draftjs)
13
14
  end
15
+
16
+ def self.from_html(html_str, options: {})
17
+ FromHtml.new(options).convert(html_str)
18
+ end
14
19
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: draftjs_html
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TJ Taylor
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-10-19 00:00:00.000000000 Z
11
+ date: 2022-11-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -66,6 +66,11 @@ files:
66
66
  - lib/draftjs_html/draftjs/entity_map.rb
67
67
  - lib/draftjs_html/draftjs/raw_builder.rb
68
68
  - lib/draftjs_html/draftjs/to_raw.rb
69
+ - lib/draftjs_html/from_html.rb
70
+ - lib/draftjs_html/from_html/depth_stack.rb
71
+ - lib/draftjs_html/from_html/elements.rb
72
+ - lib/draftjs_html/from_html/pending_block.rb
73
+ - lib/draftjs_html/from_html/style_stack.rb
69
74
  - lib/draftjs_html/html_defaults.rb
70
75
  - lib/draftjs_html/html_depth.rb
71
76
  - lib/draftjs_html/node.rb