draftjs_html 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b708c9dfd4a3d4b6cdf22de377edd9a271e6eb527c70648d11337c1d090613e5
4
- data.tar.gz: e472de0d6b79d5b7cf7380d6ed89f8c85aedf79d30d329a42550b1cf94eb7dd7
3
+ metadata.gz: 770a91eea2ce6ff069f3ccf80d8d9857e15ea8882cfd7a426fe9b8aea2b6ca4e
4
+ data.tar.gz: 1dfef55358fb964b0a6ac6c98458b1d04a59c0e115c11a2d32a5f4042c747777
5
5
  SHA512:
6
- metadata.gz: 6511068eab5eb7c99f7815c85bb6b40bd998f80cee7ed8b41a418d9295bf467f4ed4a1de47049342aa27ec02e0e29207031f1f3cfa9e853da13acc79d98e21b1
7
- data.tar.gz: bd939bb048b11692d520cfef3202b7e9d7106f36cac901cd4171f82991ccf24981b8cc610f0e61626231c463c84202d5dbb80b26221cecf1f68b9f48b435b32a
6
+ metadata.gz: '0830e5b529717b67fd2dbdd2627c53e68332bbc66601e6b405ecd60d993aef9eca797211bfc442c95c6cf00d9996d65b22cdacd12982094b4d3c33f04e86f0c2'
7
+ data.tar.gz: 47f94740645254e51675033bcb1d23d331eaa054d0d73382d689e4d10e18441177622b8fb9d66d51008c06f3d432eda1809014fd541ae2f52fa1b5d5d5905c9b
data/Gemfile CHANGED
@@ -6,3 +6,5 @@ source "https://rubygems.org"
6
6
  gemspec
7
7
 
8
8
  gem "rake", "~> 13.0"
9
+ gem "pry-byebug", "~> 3.10"
10
+ gem 'simplecov', '~> 0.21'
data/README.md CHANGED
@@ -177,6 +177,44 @@ end
177
177
  # will nest HTML nodes as you probably want (e.g. "<p>hi!</p>")
178
178
  ```
179
179
 
180
+ ### FromHtml (beta)
181
+
182
+ As an experiment, this gem is providing the ability to convert from HTML to raw
183
+ DraftJS JSON. You can explore this behavior with the following snippet:
184
+
185
+ ```ruby
186
+ DraftjsHtml.from_html("<p>Hello!</p>") # => { "blocks" => [{ "text": "Hello!", "type" => "unstyled" } ] }
187
+ ```
188
+
189
+ There are some known limitations with this approach, but, if you're just trying
190
+ to get started, it may be good enough for you. Contributions and issue reports
191
+ are welcome and encouraged.
192
+
193
+ #### `:node_to_entity:`
194
+
195
+ This `FromHtml` option allows the user to specify how a particular node is
196
+ converted to a DraftJS entity. By default, the library converts `img` and `a`
197
+ tags to `IMAGE` and `LINK` entities, respectively. If you specify this option,
198
+ you override the existing behavior and must define those conversions yourself.
199
+
200
+ The option expects a `callable` (`proc`, `lambda`, etc) that receives 3 arguments:
201
+
202
+ - tagname (e.g. `a`) - always downcased
203
+ - content - the text content inside the tag
204
+ - HTML attributes - any HTML attributes on the tag as a Hash (string keys)
205
+
206
+ The callable should return a Hash with symbol keys. The supported values are:
207
+
208
+ - `type` (required)
209
+ - the entity "type" or name
210
+ - `mutability` (optional, default `'IMMUTABLE'`)
211
+ - either 'MUTABLE', 'IMMUTABLE', or 'SEGMENTED'
212
+ - `atomic` (optional, default `false`)
213
+ - when true, creates a new "atomic" block for this entity rather than apply
214
+ the entity to the current range
215
+ - `data` (optional, default `{}`)
216
+ - an arbitrary data-bag (Hash) of entity data
217
+
180
218
  ## Development
181
219
 
182
220
  After checking out the repo, run `bin/setup` to install dependencies. Then, run
@@ -14,8 +14,8 @@ module DraftjsHtml
14
14
  @entity_map = {}
15
15
  end
16
16
 
17
- def text_block(text)
18
- typed_block('unstyled', text)
17
+ def text_block(text, depth: 0)
18
+ typed_block('unstyled', text, depth: depth)
19
19
  end
20
20
 
21
21
  def typed_block(type, text, depth: 0)
@@ -40,6 +40,10 @@ module DraftjsHtml
40
40
  entity_range(key, range)
41
41
  end
42
42
 
43
+ def has_blocks?
44
+ @blocks.any?
45
+ end
46
+
43
47
  def to_h
44
48
  {
45
49
  'blocks' => @blocks,
@@ -47,6 +51,21 @@ module DraftjsHtml
47
51
  }
48
52
  end
49
53
 
54
+ def to_s
55
+ draftjs = DraftjsHtml::Draftjs.parse(to_h)
56
+ draftjs.blocks.reduce('') do |acc, block|
57
+ acc << "typed_block '#{block.type}', '#{block.text}', depth: #{block.depth}\n"
58
+ block.inline_styles.each do |style|
59
+ acc << "inline_style '#{style.name}', #{style.range.begin}..#{style.range.end} # (#{style.offset} + #{style.length})\n"
60
+ end
61
+ block.entity_ranges.each do |entity_range|
62
+ entity = draftjs.entity_map[entity_range.name]
63
+ acc << "apply_entity '#{entity.type}', #{entity_range.range.begin}..#{entity_range.range.end}\n"
64
+ end
65
+ acc
66
+ end
67
+ end
68
+
50
69
  private
51
70
 
52
71
  def deep_stringify_keys(object)
@@ -14,6 +14,7 @@ module DraftjsHtml
14
14
 
15
15
  def convert_block(block)
16
16
  {
17
+ 'key' => block.key,
17
18
  'text' => block.text,
18
19
  'type' => block.type,
19
20
  'depth' => block.depth,
@@ -0,0 +1,109 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ class DepthStack
4
+ def initialize
5
+ @stack = []
6
+ @nodes = []
7
+ @list_depth = -1
8
+ @style_stack = StyleStack.new
9
+ end
10
+
11
+ def push(tagname, attrs)
12
+ @stack << PendingBlock.from_tag(tagname, attrs, @nodes.dup, @list_depth)
13
+ track_block_node(tagname)
14
+ end
15
+
16
+ def push_parent(tagname, attrs)
17
+ @list_depth += 1
18
+ track_block_node(tagname)
19
+ end
20
+
21
+ def pop_parent(tagname, draftjs)
22
+ @nodes.pop
23
+ blocks = []
24
+ while current.depth >= 0
25
+ blocks << @stack.pop
26
+ @nodes.pop
27
+ end
28
+ blocks.reverse_each do |pending_block|
29
+ pending_block.flush_to(draftjs, @style_stack)
30
+ pending_block.apply_entities_to(draftjs)
31
+ end
32
+ @list_depth -= 1
33
+ end
34
+
35
+ def pop(draftjs)
36
+ return if @stack.empty?
37
+ return if inside_parent?
38
+
39
+ if @nodes.last == current.tagname && current.flushable?
40
+ flush_to(draftjs)
41
+ elsif @stack[-2]
42
+ @stack[-2].consume(current)
43
+ end
44
+
45
+ @stack.pop
46
+ @nodes.pop
47
+ end
48
+
49
+ def create_pending_entity(tagname, attrs)
50
+ current.pending_entities << { tagname: tagname, start: current_character_offset + 1, attrs: attrs }
51
+ end
52
+
53
+ def convert_pending_entities(conversion)
54
+ while current.pending_entities.any?
55
+ pending_entity = current.pending_entities.pop
56
+ range = pending_entity[:start]..current_character_offset
57
+ content = current_text_buffer[range]
58
+ user_created_entity = conversion.call(pending_entity[:tagname], content, pending_entity[:attrs])
59
+ next unless user_created_entity
60
+
61
+ if content == '' && !user_created_entity[:atomic]
62
+ current.text_buffer << ' '
63
+ range = range.begin..(range.end+1)
64
+ end
65
+ current.entities << user_created_entity.merge(start: range.begin, finish: range.end)
66
+ end
67
+ end
68
+
69
+ def style_start(tagname)
70
+ @style_stack.track_start(tagname, current_character_offset + 1)
71
+ end
72
+
73
+ def style_end(tagname)
74
+ @style_stack.track_end(tagname, current_character_offset)
75
+ end
76
+
77
+ def flush_to(draftjs)
78
+ current.flush_to(draftjs, @style_stack)
79
+ current.apply_entities_to(draftjs)
80
+ end
81
+
82
+ def append_text(chars)
83
+ current.text_buffer << chars unless chars.empty?
84
+ end
85
+
86
+ private
87
+
88
+ def current_text_buffer
89
+ current.text_buffer.join
90
+ end
91
+
92
+ def current_character_offset
93
+ current.character_offset
94
+ end
95
+
96
+ def track_block_node(name)
97
+ @nodes << name
98
+ end
99
+
100
+ def inside_parent?
101
+ (FromHtml::LIST_PARENT_ELEMENTS & @nodes).any?
102
+ end
103
+
104
+ def current
105
+ @stack.last
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,8 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ INLINE_STYLE_ELEMENTS = HtmlDefaults::HTML_STYLE_TAGS_TO_STYLE.keys.freeze
4
+ LIST_PARENT_ELEMENTS = %w[ol ul table].freeze
5
+ INLINE_NON_STYLE_ELEMENTS = %w[a abbr cite font img output q samp span thead tbody td time var].freeze
6
+ BLOCK_CONTENT_ELEMENTS = %w[p dl h1 h2 h3 h4 h5 h6].freeze
7
+ end
8
+ end
@@ -0,0 +1,82 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ PendingBlock = Struct.new(:tagname, :attrs, :chars, :entities, :pending_entities, :parent_tagnames, :depth, keyword_init: true) do
4
+ def self.from_tag(name, attrs, parent_tagnames, depth)
5
+ self.new(
6
+ tagname: name,
7
+ attrs: attrs,
8
+ entities: [],
9
+ chars: [],
10
+ pending_entities: [],
11
+ depth: depth,
12
+ parent_tagnames: parent_tagnames,
13
+ )
14
+ end
15
+
16
+ def text_buffer
17
+ self[:chars]
18
+ end
19
+
20
+ def clear_text_buffer
21
+ self[:chars] = []
22
+ end
23
+
24
+ def character_offset
25
+ text_buffer.join.length - 1
26
+ end
27
+
28
+ def flushable?
29
+ %w[OPENING ol ul li table].include?(parent_tagnames.last) ||
30
+ (parent_tagnames.last == 'div' && tagname != 'div')
31
+ end
32
+
33
+ def consume(other_pending_block)
34
+ self.text_buffer += other_pending_block.text_buffer
35
+ self.pending_entities += other_pending_block.pending_entities
36
+ self.entities += other_pending_block.entities
37
+ end
38
+
39
+ def flush_to(draftjs, styles)
40
+ if text_buffer.any?
41
+ chars.join.lines.each do |line|
42
+ draftjs.typed_block(block_name, line.chomp, depth: [depth, 0].max)
43
+ end
44
+
45
+ styles.each do |descriptor|
46
+ finish = descriptor[:finish] || character_offset
47
+ draftjs.inline_style(descriptor[:style], descriptor[:start]..finish)
48
+ end
49
+ end
50
+
51
+ clear_text_buffer
52
+ styles.clear_finished
53
+ end
54
+
55
+ def apply_entities_to(draftjs)
56
+ Array(entities).each do |entity|
57
+ range = entity[:start]..entity[:finish]
58
+ if entity[:atomic]
59
+ draftjs.typed_block('atomic', ' ', depth: [depth, 0].max)
60
+ range = 0..0
61
+ end
62
+
63
+ draftjs.apply_entity entity[:type], range, data: entity[:data], mutability: entity.fetch(:mutability, 'IMMUTABLE')
64
+ end
65
+ end
66
+
67
+ def block_name
68
+ stack = parent_tagnames.last == 'li' ? parent_tagnames.last(2) : parent_tagnames.last(1)
69
+ return 'ordered-list-item' if stack.first == 'ol'
70
+ return 'unordered-list-item' if stack.first == 'ul'
71
+
72
+ DraftjsHtml::HtmlDefaults::BLOCK_TYPE_TO_HTML.invert.fetch(tagname, 'unstyled')
73
+ end
74
+
75
+ private
76
+
77
+ def text_buffer=(other)
78
+ self[:chars] = other
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,50 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ class StyleStack
4
+ def initialize
5
+ @stack = []
6
+ end
7
+
8
+ def clear_finished
9
+ @stack.delete_if { !!_1[:finish] }
10
+ end
11
+
12
+ def each(&block)
13
+ @stack.reverse_each.group_by { _1[:tagname] }.each do |_, descriptors|
14
+ overlapping_ranges = find_overlapping_styles(descriptors)
15
+ widest_descriptor = overlapping_ranges.max_by { (_1[:start].._1[:finish]).size }
16
+
17
+ applicable_styles = descriptors - overlapping_ranges + [widest_descriptor].compact
18
+ applicable_styles.each(&block)
19
+ end
20
+ end
21
+
22
+ def track_start(tagname, current_character_offset)
23
+ style = DraftjsHtml::HtmlDefaults::HTML_STYLE_TAGS_TO_STYLE[tagname]
24
+ @stack.unshift({ tagname: tagname, style: style, start: current_character_offset })
25
+ end
26
+
27
+ def track_end(tagname, current_character_offset)
28
+ descriptor_index = @stack.find_index { _1[:tagname] == tagname && !_1[:finish] }
29
+ descriptor = @stack[descriptor_index]
30
+ descriptor[:finish] = current_character_offset
31
+ end
32
+
33
+ private
34
+
35
+ def find_overlapping_styles(descriptors)
36
+ descriptors.select do |candidate_a|
37
+ candidate_range = candidate_a[:start]..candidate_a[:finish]
38
+ (descriptors - [candidate_a]).any? do |other|
39
+ other_range = other[:start]..other[:finish]
40
+ range_overlaps?(candidate_range, other_range)
41
+ end
42
+ end
43
+ end
44
+
45
+ def range_overlaps?(candidate_range, other_range)
46
+ other_range.begin == candidate_range.begin || candidate_range.cover?(other_range.begin) || other_range.cover?(candidate_range.begin)
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,107 @@
1
+ require 'stringio'
2
+ require_relative 'html_defaults'
3
+ require_relative 'from_html/elements'
4
+ require_relative 'from_html/style_stack'
5
+ require_relative 'from_html/pending_block'
6
+ require_relative 'from_html/depth_stack'
7
+
8
+ module DraftjsHtml
9
+ class FromHtml < Nokogiri::XML::SAX::Document
10
+
11
+ def initialize(options = {})
12
+ @draftjs = Draftjs::RawBuilder.new
13
+ @parser = Nokogiri::HTML4::SAX::Parser.new(self)
14
+ @depth_stack = DepthStack.new
15
+ @options = ensure_options!(options.dup)
16
+ end
17
+
18
+ def convert(raw_html)
19
+ convert_io(StringIO.new(raw_html))
20
+ end
21
+
22
+ def convert_io(html_io)
23
+ @parser.parse(html_io)
24
+ @draftjs.to_h
25
+ end
26
+
27
+ def characters(str)
28
+ content = strip_unnecessary_trailing_space(str)
29
+ @depth_stack.append_text(content)
30
+ end
31
+
32
+ def end_element(name)
33
+ track_pending_entity_end(name)
34
+
35
+ case name
36
+ when 'br' then return
37
+ when 'html', 'body', *FromHtml::INLINE_NON_STYLE_ELEMENTS
38
+ when *FromHtml::INLINE_STYLE_ELEMENTS
39
+ track_inline_style_end(name)
40
+ when *FromHtml::LIST_PARENT_ELEMENTS
41
+ @depth_stack.pop_parent(name, @draftjs)
42
+ else
43
+ @depth_stack.pop(@draftjs)
44
+ end
45
+ end
46
+
47
+ def start_element(name, attrs = [])
48
+ attributes = Hash[attrs]
49
+
50
+ case name
51
+ when 'br'
52
+ @depth_stack.append_text("\n")
53
+ when 'html', 'body', *FromHtml::INLINE_NON_STYLE_ELEMENTS
54
+ when *FromHtml::INLINE_STYLE_ELEMENTS
55
+ track_inline_style_start(name)
56
+ when *FromHtml::LIST_PARENT_ELEMENTS
57
+ @depth_stack.push_parent(name, attrs)
58
+ else
59
+ @depth_stack.push(name, attributes)
60
+ end
61
+
62
+ track_pending_entity_start(name, attributes)
63
+ end
64
+
65
+ def start_document
66
+ @depth_stack.push('OPENING', {})
67
+ end
68
+
69
+ def end_document
70
+ @depth_stack.flush_to(@draftjs)
71
+ end
72
+
73
+ private
74
+
75
+ def track_inline_style_start(tagname)
76
+ @depth_stack.style_start(tagname)
77
+ end
78
+
79
+ def track_inline_style_end(tagname)
80
+ @depth_stack.style_end(tagname)
81
+ end
82
+
83
+ def strip_unnecessary_trailing_space(str)
84
+ str
85
+ .gsub(/(\n+[[:space:]]*$)|(^\n+)/, '')
86
+ .gsub(/(^[[:space:]]+$)/, ' ')
87
+ end
88
+
89
+ def track_pending_entity_start(tagname, attrs)
90
+ @depth_stack.create_pending_entity(tagname, attrs)
91
+ end
92
+
93
+ def track_pending_entity_end(name)
94
+ @depth_stack.convert_pending_entities(@options[:node_to_entity])
95
+ end
96
+
97
+ def ensure_options!(opts)
98
+ opts[:node_to_entity] ||= ->(tagname, _content, attrs) {
99
+ case tagname
100
+ when 'a' then { type: 'LINK', mutability: 'MUTABLE', data: attrs }
101
+ when 'img' then { type: 'IMAGE', mutability: 'IMMUTABLE', atomic: true, data: attrs }
102
+ end
103
+ }
104
+ opts
105
+ end
106
+ end
107
+ end
@@ -1,8 +1,8 @@
1
1
  module DraftjsHtml
2
2
  module HtmlDefaults
3
3
  BLOCK_TYPE_TO_HTML = {
4
- 'unstyled' => 'p',
5
4
  'paragraph' => 'p',
5
+ 'unstyled' => 'p',
6
6
  'header-one' => 'h1',
7
7
  'header-two' => 'h2',
8
8
  'header-three' => 'h3',
@@ -23,6 +23,18 @@ module DraftjsHtml
23
23
  'UNDERLINE' => 'u',
24
24
  }.freeze
25
25
 
26
+ HTML_STYLE_TAGS_TO_STYLE = {
27
+ 'b' => 'BOLD',
28
+ 'i' => 'ITALIC',
29
+ 'em' => 'ITALIC',
30
+ 'del' => 'STRIKETHROUGH',
31
+ 'u' => 'UNDERLINE',
32
+ 'strong' => 'BOLD',
33
+ 'small' => 'SMALL',
34
+ 'sub' => 'SUBSCRIPT',
35
+ 'sup' => 'SUPERSCRIPT',
36
+ }.freeze
37
+
26
38
  ENTITY_ATTRIBUTE_NAME_MAP = {
27
39
  'className' => 'class',
28
40
  'url' => 'href',
@@ -32,7 +44,7 @@ module DraftjsHtml
32
44
 
33
45
  ENTITY_CONVERSION_MAP = {
34
46
  'LINK' => ->(entity, content, *) {
35
- attributes = entity.data.slice('url', 'rel', 'target', 'title', 'className').each_with_object({}) do |(attr, value), h|
47
+ attributes = entity.data.slice('url', 'href', 'rel', 'target', 'title', 'className').each_with_object({}) do |(attr, value), h|
36
48
  h[ENTITY_ATTRIBUTE_NAME_MAP.fetch(attr, attr)] = value
37
49
  end
38
50
 
@@ -47,4 +59,4 @@ module DraftjsHtml
47
59
  }
48
60
  }.freeze
49
61
  end
50
- end
62
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DraftjsHtml
4
- VERSION = "0.14.0"
4
+ VERSION = "0.16.0"
5
5
  end
data/lib/draftjs_html.rb CHANGED
@@ -4,6 +4,7 @@ require_relative "draftjs_html/version"
4
4
  require 'nokogiri'
5
5
  require_relative 'draftjs_html/draftjs'
6
6
  require_relative 'draftjs_html/to_html'
7
+ require_relative 'draftjs_html/from_html'
7
8
 
8
9
  module DraftjsHtml
9
10
  class Error < StandardError; end
@@ -11,4 +12,8 @@ module DraftjsHtml
11
12
  def self.to_html(raw_draftjs, options: {})
12
13
  ToHtml.new(options).convert(raw_draftjs)
13
14
  end
15
+
16
+ def self.from_html(html_str, options: {})
17
+ FromHtml.new(options).convert(html_str)
18
+ end
14
19
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: draftjs_html
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TJ Taylor
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-10-19 00:00:00.000000000 Z
11
+ date: 2022-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -66,6 +66,11 @@ files:
66
66
  - lib/draftjs_html/draftjs/entity_map.rb
67
67
  - lib/draftjs_html/draftjs/raw_builder.rb
68
68
  - lib/draftjs_html/draftjs/to_raw.rb
69
+ - lib/draftjs_html/from_html.rb
70
+ - lib/draftjs_html/from_html/depth_stack.rb
71
+ - lib/draftjs_html/from_html/elements.rb
72
+ - lib/draftjs_html/from_html/pending_block.rb
73
+ - lib/draftjs_html/from_html/style_stack.rb
69
74
  - lib/draftjs_html/html_defaults.rb
70
75
  - lib/draftjs_html/html_depth.rb
71
76
  - lib/draftjs_html/node.rb