draftjs_html 0.14.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b708c9dfd4a3d4b6cdf22de377edd9a271e6eb527c70648d11337c1d090613e5
4
- data.tar.gz: e472de0d6b79d5b7cf7380d6ed89f8c85aedf79d30d329a42550b1cf94eb7dd7
3
+ metadata.gz: 770a91eea2ce6ff069f3ccf80d8d9857e15ea8882cfd7a426fe9b8aea2b6ca4e
4
+ data.tar.gz: 1dfef55358fb964b0a6ac6c98458b1d04a59c0e115c11a2d32a5f4042c747777
5
5
  SHA512:
6
- metadata.gz: 6511068eab5eb7c99f7815c85bb6b40bd998f80cee7ed8b41a418d9295bf467f4ed4a1de47049342aa27ec02e0e29207031f1f3cfa9e853da13acc79d98e21b1
7
- data.tar.gz: bd939bb048b11692d520cfef3202b7e9d7106f36cac901cd4171f82991ccf24981b8cc610f0e61626231c463c84202d5dbb80b26221cecf1f68b9f48b435b32a
6
+ metadata.gz: '0830e5b529717b67fd2dbdd2627c53e68332bbc66601e6b405ecd60d993aef9eca797211bfc442c95c6cf00d9996d65b22cdacd12982094b4d3c33f04e86f0c2'
7
+ data.tar.gz: 47f94740645254e51675033bcb1d23d331eaa054d0d73382d689e4d10e18441177622b8fb9d66d51008c06f3d432eda1809014fd541ae2f52fa1b5d5d5905c9b
data/Gemfile CHANGED
@@ -6,3 +6,5 @@ source "https://rubygems.org"
6
6
  gemspec
7
7
 
8
8
  gem "rake", "~> 13.0"
9
+ gem "pry-byebug", "~> 3.10"
10
+ gem 'simplecov', '~> 0.21'
data/README.md CHANGED
@@ -177,6 +177,44 @@ end
177
177
  # will nest HTML nodes as you probably want (e.g. "<p>hi!</p>")
178
178
  ```
179
179
 
180
+ ### FromHtml (beta)
181
+
182
+ As an experiment, this gem is providing the ability to convert from HTML to raw
183
+ DraftJS JSON. You can explore this behavior with the following snippet:
184
+
185
+ ```ruby
186
+ DraftjsHtml.from_html("<p>Hello!</p>") # => { "blocks" => [{ "text": "Hello!", "type" => "unstyled" } ] }
187
+ ```
188
+
189
+ There are some known limitations with this approach, but, if you're just trying
190
+ to get started, it may be good enough for you. Contributions and issue reports
191
+ are welcome and encouraged.
192
+
193
+ #### `:node_to_entity:`
194
+
195
+ This `FromHtml` option allows the user to specify how a particular node is
196
+ converted to a DraftJS entity. By default, the library converts `img` and `a`
197
+ tags to `IMAGE` and `LINK` entities, respectively. If you specify this option,
198
+ you override the existing behavior and must define those conversions yourself.
199
+
200
+ The option expects a `callable` (`proc`, `lambda`, etc) that receives 3 arguments:
201
+
202
+ - tagname (e.g. `a`) - always downcased
203
+ - content - the text content inside the tag
204
+ - HTML attributes - any HTML attributes on the tag as a Hash (string keys)
205
+
206
+ The callable should return a Hash with symbol keys. The supported values are:
207
+
208
+ - `type` (required)
209
+ - the entity "type" or name
210
+ - `mutability` (optional, default `'IMMUTABLE'`)
211
+ - either 'MUTABLE', 'IMMUTABLE', or 'SEGMENTED'
212
+ - `atomic` (optional, default `false`)
213
+ - when true, creates a new "atomic" block for this entity rather than apply
214
+ the entity to the current range
215
+ - `data` (optional, default `{}`)
216
+ - an arbitrary data-bag (Hash) of entity data
217
+
180
218
  ## Development
181
219
 
182
220
  After checking out the repo, run `bin/setup` to install dependencies. Then, run
@@ -14,8 +14,8 @@ module DraftjsHtml
14
14
  @entity_map = {}
15
15
  end
16
16
 
17
- def text_block(text)
18
- typed_block('unstyled', text)
17
+ def text_block(text, depth: 0)
18
+ typed_block('unstyled', text, depth: depth)
19
19
  end
20
20
 
21
21
  def typed_block(type, text, depth: 0)
@@ -40,6 +40,10 @@ module DraftjsHtml
40
40
  entity_range(key, range)
41
41
  end
42
42
 
43
+ def has_blocks?
44
+ @blocks.any?
45
+ end
46
+
43
47
  def to_h
44
48
  {
45
49
  'blocks' => @blocks,
@@ -47,6 +51,21 @@ module DraftjsHtml
47
51
  }
48
52
  end
49
53
 
54
+ def to_s
55
+ draftjs = DraftjsHtml::Draftjs.parse(to_h)
56
+ draftjs.blocks.reduce('') do |acc, block|
57
+ acc << "typed_block '#{block.type}', '#{block.text}', depth: #{block.depth}\n"
58
+ block.inline_styles.each do |style|
59
+ acc << "inline_style '#{style.name}', #{style.range.begin}..#{style.range.end} # (#{style.offset} + #{style.length})\n"
60
+ end
61
+ block.entity_ranges.each do |entity_range|
62
+ entity = draftjs.entity_map[entity_range.name]
63
+ acc << "apply_entity '#{entity.type}', #{entity_range.range.begin}..#{entity_range.range.end}\n"
64
+ end
65
+ acc
66
+ end
67
+ end
68
+
50
69
  private
51
70
 
52
71
  def deep_stringify_keys(object)
@@ -14,6 +14,7 @@ module DraftjsHtml
14
14
 
15
15
  def convert_block(block)
16
16
  {
17
+ 'key' => block.key,
17
18
  'text' => block.text,
18
19
  'type' => block.type,
19
20
  'depth' => block.depth,
@@ -0,0 +1,109 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ class DepthStack
4
+ def initialize
5
+ @stack = []
6
+ @nodes = []
7
+ @list_depth = -1
8
+ @style_stack = StyleStack.new
9
+ end
10
+
11
+ def push(tagname, attrs)
12
+ @stack << PendingBlock.from_tag(tagname, attrs, @nodes.dup, @list_depth)
13
+ track_block_node(tagname)
14
+ end
15
+
16
+ def push_parent(tagname, attrs)
17
+ @list_depth += 1
18
+ track_block_node(tagname)
19
+ end
20
+
21
+ def pop_parent(tagname, draftjs)
22
+ @nodes.pop
23
+ blocks = []
24
+ while current.depth >= 0
25
+ blocks << @stack.pop
26
+ @nodes.pop
27
+ end
28
+ blocks.reverse_each do |pending_block|
29
+ pending_block.flush_to(draftjs, @style_stack)
30
+ pending_block.apply_entities_to(draftjs)
31
+ end
32
+ @list_depth -= 1
33
+ end
34
+
35
+ def pop(draftjs)
36
+ return if @stack.empty?
37
+ return if inside_parent?
38
+
39
+ if @nodes.last == current.tagname && current.flushable?
40
+ flush_to(draftjs)
41
+ elsif @stack[-2]
42
+ @stack[-2].consume(current)
43
+ end
44
+
45
+ @stack.pop
46
+ @nodes.pop
47
+ end
48
+
49
+ def create_pending_entity(tagname, attrs)
50
+ current.pending_entities << { tagname: tagname, start: current_character_offset + 1, attrs: attrs }
51
+ end
52
+
53
+ def convert_pending_entities(conversion)
54
+ while current.pending_entities.any?
55
+ pending_entity = current.pending_entities.pop
56
+ range = pending_entity[:start]..current_character_offset
57
+ content = current_text_buffer[range]
58
+ user_created_entity = conversion.call(pending_entity[:tagname], content, pending_entity[:attrs])
59
+ next unless user_created_entity
60
+
61
+ if content == '' && !user_created_entity[:atomic]
62
+ current.text_buffer << ' '
63
+ range = range.begin..(range.end+1)
64
+ end
65
+ current.entities << user_created_entity.merge(start: range.begin, finish: range.end)
66
+ end
67
+ end
68
+
69
+ def style_start(tagname)
70
+ @style_stack.track_start(tagname, current_character_offset + 1)
71
+ end
72
+
73
+ def style_end(tagname)
74
+ @style_stack.track_end(tagname, current_character_offset)
75
+ end
76
+
77
+ def flush_to(draftjs)
78
+ current.flush_to(draftjs, @style_stack)
79
+ current.apply_entities_to(draftjs)
80
+ end
81
+
82
+ def append_text(chars)
83
+ current.text_buffer << chars unless chars.empty?
84
+ end
85
+
86
+ private
87
+
88
+ def current_text_buffer
89
+ current.text_buffer.join
90
+ end
91
+
92
+ def current_character_offset
93
+ current.character_offset
94
+ end
95
+
96
+ def track_block_node(name)
97
+ @nodes << name
98
+ end
99
+
100
+ def inside_parent?
101
+ (FromHtml::LIST_PARENT_ELEMENTS & @nodes).any?
102
+ end
103
+
104
+ def current
105
+ @stack.last
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,8 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ INLINE_STYLE_ELEMENTS = HtmlDefaults::HTML_STYLE_TAGS_TO_STYLE.keys.freeze
4
+ LIST_PARENT_ELEMENTS = %w[ol ul table].freeze
5
+ INLINE_NON_STYLE_ELEMENTS = %w[a abbr cite font img output q samp span thead tbody td time var].freeze
6
+ BLOCK_CONTENT_ELEMENTS = %w[p dl h1 h2 h3 h4 h5 h6].freeze
7
+ end
8
+ end
@@ -0,0 +1,82 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ PendingBlock = Struct.new(:tagname, :attrs, :chars, :entities, :pending_entities, :parent_tagnames, :depth, keyword_init: true) do
4
+ def self.from_tag(name, attrs, parent_tagnames, depth)
5
+ self.new(
6
+ tagname: name,
7
+ attrs: attrs,
8
+ entities: [],
9
+ chars: [],
10
+ pending_entities: [],
11
+ depth: depth,
12
+ parent_tagnames: parent_tagnames,
13
+ )
14
+ end
15
+
16
+ def text_buffer
17
+ self[:chars]
18
+ end
19
+
20
+ def clear_text_buffer
21
+ self[:chars] = []
22
+ end
23
+
24
+ def character_offset
25
+ text_buffer.join.length - 1
26
+ end
27
+
28
+ def flushable?
29
+ %w[OPENING ol ul li table].include?(parent_tagnames.last) ||
30
+ (parent_tagnames.last == 'div' && tagname != 'div')
31
+ end
32
+
33
+ def consume(other_pending_block)
34
+ self.text_buffer += other_pending_block.text_buffer
35
+ self.pending_entities += other_pending_block.pending_entities
36
+ self.entities += other_pending_block.entities
37
+ end
38
+
39
+ def flush_to(draftjs, styles)
40
+ if text_buffer.any?
41
+ chars.join.lines.each do |line|
42
+ draftjs.typed_block(block_name, line.chomp, depth: [depth, 0].max)
43
+ end
44
+
45
+ styles.each do |descriptor|
46
+ finish = descriptor[:finish] || character_offset
47
+ draftjs.inline_style(descriptor[:style], descriptor[:start]..finish)
48
+ end
49
+ end
50
+
51
+ clear_text_buffer
52
+ styles.clear_finished
53
+ end
54
+
55
+ def apply_entities_to(draftjs)
56
+ Array(entities).each do |entity|
57
+ range = entity[:start]..entity[:finish]
58
+ if entity[:atomic]
59
+ draftjs.typed_block('atomic', ' ', depth: [depth, 0].max)
60
+ range = 0..0
61
+ end
62
+
63
+ draftjs.apply_entity entity[:type], range, data: entity[:data], mutability: entity.fetch(:mutability, 'IMMUTABLE')
64
+ end
65
+ end
66
+
67
+ def block_name
68
+ stack = parent_tagnames.last == 'li' ? parent_tagnames.last(2) : parent_tagnames.last(1)
69
+ return 'ordered-list-item' if stack.first == 'ol'
70
+ return 'unordered-list-item' if stack.first == 'ul'
71
+
72
+ DraftjsHtml::HtmlDefaults::BLOCK_TYPE_TO_HTML.invert.fetch(tagname, 'unstyled')
73
+ end
74
+
75
+ private
76
+
77
+ def text_buffer=(other)
78
+ self[:chars] = other
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,50 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ class StyleStack
4
+ def initialize
5
+ @stack = []
6
+ end
7
+
8
+ def clear_finished
9
+ @stack.delete_if { !!_1[:finish] }
10
+ end
11
+
12
+ def each(&block)
13
+ @stack.reverse_each.group_by { _1[:tagname] }.each do |_, descriptors|
14
+ overlapping_ranges = find_overlapping_styles(descriptors)
15
+ widest_descriptor = overlapping_ranges.max_by { (_1[:start].._1[:finish]).size }
16
+
17
+ applicable_styles = descriptors - overlapping_ranges + [widest_descriptor].compact
18
+ applicable_styles.each(&block)
19
+ end
20
+ end
21
+
22
+ def track_start(tagname, current_character_offset)
23
+ style = DraftjsHtml::HtmlDefaults::HTML_STYLE_TAGS_TO_STYLE[tagname]
24
+ @stack.unshift({ tagname: tagname, style: style, start: current_character_offset })
25
+ end
26
+
27
+ def track_end(tagname, current_character_offset)
28
+ descriptor_index = @stack.find_index { _1[:tagname] == tagname && !_1[:finish] }
29
+ descriptor = @stack[descriptor_index]
30
+ descriptor[:finish] = current_character_offset
31
+ end
32
+
33
+ private
34
+
35
+ def find_overlapping_styles(descriptors)
36
+ descriptors.select do |candidate_a|
37
+ candidate_range = candidate_a[:start]..candidate_a[:finish]
38
+ (descriptors - [candidate_a]).any? do |other|
39
+ other_range = other[:start]..other[:finish]
40
+ range_overlaps?(candidate_range, other_range)
41
+ end
42
+ end
43
+ end
44
+
45
+ def range_overlaps?(candidate_range, other_range)
46
+ other_range.begin == candidate_range.begin || candidate_range.cover?(other_range.begin) || other_range.cover?(candidate_range.begin)
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,107 @@
1
+ require 'stringio'
2
+ require_relative 'html_defaults'
3
+ require_relative 'from_html/elements'
4
+ require_relative 'from_html/style_stack'
5
+ require_relative 'from_html/pending_block'
6
+ require_relative 'from_html/depth_stack'
7
+
8
+ module DraftjsHtml
9
+ class FromHtml < Nokogiri::XML::SAX::Document
10
+
11
+ def initialize(options = {})
12
+ @draftjs = Draftjs::RawBuilder.new
13
+ @parser = Nokogiri::HTML4::SAX::Parser.new(self)
14
+ @depth_stack = DepthStack.new
15
+ @options = ensure_options!(options.dup)
16
+ end
17
+
18
+ def convert(raw_html)
19
+ convert_io(StringIO.new(raw_html))
20
+ end
21
+
22
+ def convert_io(html_io)
23
+ @parser.parse(html_io)
24
+ @draftjs.to_h
25
+ end
26
+
27
+ def characters(str)
28
+ content = strip_unnecessary_trailing_space(str)
29
+ @depth_stack.append_text(content)
30
+ end
31
+
32
+ def end_element(name)
33
+ track_pending_entity_end(name)
34
+
35
+ case name
36
+ when 'br' then return
37
+ when 'html', 'body', *FromHtml::INLINE_NON_STYLE_ELEMENTS
38
+ when *FromHtml::INLINE_STYLE_ELEMENTS
39
+ track_inline_style_end(name)
40
+ when *FromHtml::LIST_PARENT_ELEMENTS
41
+ @depth_stack.pop_parent(name, @draftjs)
42
+ else
43
+ @depth_stack.pop(@draftjs)
44
+ end
45
+ end
46
+
47
+ def start_element(name, attrs = [])
48
+ attributes = Hash[attrs]
49
+
50
+ case name
51
+ when 'br'
52
+ @depth_stack.append_text("\n")
53
+ when 'html', 'body', *FromHtml::INLINE_NON_STYLE_ELEMENTS
54
+ when *FromHtml::INLINE_STYLE_ELEMENTS
55
+ track_inline_style_start(name)
56
+ when *FromHtml::LIST_PARENT_ELEMENTS
57
+ @depth_stack.push_parent(name, attrs)
58
+ else
59
+ @depth_stack.push(name, attributes)
60
+ end
61
+
62
+ track_pending_entity_start(name, attributes)
63
+ end
64
+
65
+ def start_document
66
+ @depth_stack.push('OPENING', {})
67
+ end
68
+
69
+ def end_document
70
+ @depth_stack.flush_to(@draftjs)
71
+ end
72
+
73
+ private
74
+
75
+ def track_inline_style_start(tagname)
76
+ @depth_stack.style_start(tagname)
77
+ end
78
+
79
+ def track_inline_style_end(tagname)
80
+ @depth_stack.style_end(tagname)
81
+ end
82
+
83
+ def strip_unnecessary_trailing_space(str)
84
+ str
85
+ .gsub(/(\n+[[:space:]]*$)|(^\n+)/, '')
86
+ .gsub(/(^[[:space:]]+$)/, ' ')
87
+ end
88
+
89
+ def track_pending_entity_start(tagname, attrs)
90
+ @depth_stack.create_pending_entity(tagname, attrs)
91
+ end
92
+
93
+ def track_pending_entity_end(name)
94
+ @depth_stack.convert_pending_entities(@options[:node_to_entity])
95
+ end
96
+
97
+ def ensure_options!(opts)
98
+ opts[:node_to_entity] ||= ->(tagname, _content, attrs) {
99
+ case tagname
100
+ when 'a' then { type: 'LINK', mutability: 'MUTABLE', data: attrs }
101
+ when 'img' then { type: 'IMAGE', mutability: 'IMMUTABLE', atomic: true, data: attrs }
102
+ end
103
+ }
104
+ opts
105
+ end
106
+ end
107
+ end
@@ -1,8 +1,8 @@
1
1
  module DraftjsHtml
2
2
  module HtmlDefaults
3
3
  BLOCK_TYPE_TO_HTML = {
4
- 'unstyled' => 'p',
5
4
  'paragraph' => 'p',
5
+ 'unstyled' => 'p',
6
6
  'header-one' => 'h1',
7
7
  'header-two' => 'h2',
8
8
  'header-three' => 'h3',
@@ -23,6 +23,18 @@ module DraftjsHtml
23
23
  'UNDERLINE' => 'u',
24
24
  }.freeze
25
25
 
26
+ HTML_STYLE_TAGS_TO_STYLE = {
27
+ 'b' => 'BOLD',
28
+ 'i' => 'ITALIC',
29
+ 'em' => 'ITALIC',
30
+ 'del' => 'STRIKETHROUGH',
31
+ 'u' => 'UNDERLINE',
32
+ 'strong' => 'BOLD',
33
+ 'small' => 'SMALL',
34
+ 'sub' => 'SUBSCRIPT',
35
+ 'sup' => 'SUPERSCRIPT',
36
+ }.freeze
37
+
26
38
  ENTITY_ATTRIBUTE_NAME_MAP = {
27
39
  'className' => 'class',
28
40
  'url' => 'href',
@@ -32,7 +44,7 @@ module DraftjsHtml
32
44
 
33
45
  ENTITY_CONVERSION_MAP = {
34
46
  'LINK' => ->(entity, content, *) {
35
- attributes = entity.data.slice('url', 'rel', 'target', 'title', 'className').each_with_object({}) do |(attr, value), h|
47
+ attributes = entity.data.slice('url', 'href', 'rel', 'target', 'title', 'className').each_with_object({}) do |(attr, value), h|
36
48
  h[ENTITY_ATTRIBUTE_NAME_MAP.fetch(attr, attr)] = value
37
49
  end
38
50
 
@@ -47,4 +59,4 @@ module DraftjsHtml
47
59
  }
48
60
  }.freeze
49
61
  end
50
- end
62
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DraftjsHtml
4
- VERSION = "0.14.0"
4
+ VERSION = "0.16.0"
5
5
  end
data/lib/draftjs_html.rb CHANGED
@@ -4,6 +4,7 @@ require_relative "draftjs_html/version"
4
4
  require 'nokogiri'
5
5
  require_relative 'draftjs_html/draftjs'
6
6
  require_relative 'draftjs_html/to_html'
7
+ require_relative 'draftjs_html/from_html'
7
8
 
8
9
  module DraftjsHtml
9
10
  class Error < StandardError; end
@@ -11,4 +12,8 @@ module DraftjsHtml
11
12
  def self.to_html(raw_draftjs, options: {})
12
13
  ToHtml.new(options).convert(raw_draftjs)
13
14
  end
15
+
16
+ def self.from_html(html_str, options: {})
17
+ FromHtml.new(options).convert(html_str)
18
+ end
14
19
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: draftjs_html
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.16.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TJ Taylor
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-10-19 00:00:00.000000000 Z
11
+ date: 2022-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -66,6 +66,11 @@ files:
66
66
  - lib/draftjs_html/draftjs/entity_map.rb
67
67
  - lib/draftjs_html/draftjs/raw_builder.rb
68
68
  - lib/draftjs_html/draftjs/to_raw.rb
69
+ - lib/draftjs_html/from_html.rb
70
+ - lib/draftjs_html/from_html/depth_stack.rb
71
+ - lib/draftjs_html/from_html/elements.rb
72
+ - lib/draftjs_html/from_html/pending_block.rb
73
+ - lib/draftjs_html/from_html/style_stack.rb
69
74
  - lib/draftjs_html/html_defaults.rb
70
75
  - lib/draftjs_html/html_depth.rb
71
76
  - lib/draftjs_html/node.rb