draftjs_html 0.13.0 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 050fd09294a00e0826e07a04238cf197b5d8ff0bbe39afc643400ddda3222bcc
4
- data.tar.gz: a3fa4a939eb34470bfc9f3f0e00cccfbb28257a101bc32e8cdcbcd94a2246fac
3
+ metadata.gz: 7195ba4d0f0e659dfd6cb335ee461ca122d3db48f379e8c88c01000fbedefccb
4
+ data.tar.gz: 0ee5ec82ddcac4ae7acb99399a7fd6d52054d21f23abcb2a2ff61cca6bc2c9b7
5
5
  SHA512:
6
- metadata.gz: 9d9625eb7f4752c9f1f62a0cbf26f90d99835ec7dfd4ac133b4fdd7a67348b2aeb0c9e62b1187bc3b9a48ee3b0887fb24261e5f8daa139b3e570c7c1797ad71f
7
- data.tar.gz: 4be2ef23835b8d614ea53359d01650ce881404dd3cf69db9bc65b99063157dbee515aa6670d668b0d0f3a0817141a943457cc3bd11bfac3b24d2edac00fe423c
6
+ metadata.gz: 9ee9d2f946504b1c79b936d215edda2a17ec94970c356e2b59be224da471ace3c0389af24418a7c9d93b0d343b3d6681e4a8638e8bf83b7e776257f8f5babb2e
7
+ data.tar.gz: ebb33f7bfa4fd9a0ca6de27c4bd6a5ff8b24b4d0f480706edff7dbf1065ac89b28f4f6f075f06d9dcc03825c33e6e07df36166c4f1d333e5316b68a9b80454a1
data/Gemfile CHANGED
@@ -6,3 +6,5 @@ source "https://rubygems.org"
6
6
  gemspec
7
7
 
8
8
  gem "rake", "~> 13.0"
9
+ gem "pry-byebug", "~> 3.10"
10
+ gem 'simplecov', '~> 0.21'
data/README.md CHANGED
@@ -177,6 +177,44 @@ end
177
177
  # will nest HTML nodes as you probably want (e.g. "<p>hi!</p>")
178
178
  ```
179
179
 
180
+ ### FromHtml (beta)
181
+
182
+ As an experiment, this gem is providing the ability to convert from HTML to raw
183
+ DraftJS JSON. You can explore this behavior with the following snippet:
184
+
185
+ ```ruby
186
+ DraftjsHtml.from_html("<p>Hello!</p>") # => { "blocks" => [{ "text": "Hello!", "type" => "unstyled" } ] }
187
+ ```
188
+
189
+ There are some known limitations with this approach, but, if you're just trying
190
+ to get started, it may be good enough for you. Contributions and issue reports
191
+ are welcome and encouraged.
192
+
193
+ #### `:node_to_entity:`
194
+
195
+ This `FromHtml` option allows the user to specify how a particular node is
196
+ converted to a DraftJS entity. By default, the library converts `img` and `a`
197
+ tags to `IMAGE` and `LINK` entities, respectively. If you specify this option,
198
+ you override the existing behavior and must define those conversions yourself.
199
+
200
+ The option expects a `callable` (`proc`, `lambda`, etc) that receives 3 arguments:
201
+
202
+ - tagname (e.g. `a`) - always downcased
203
+ - content - the text content inside the tag
204
+ - HTML attributes - any HTML attributes on the tag as a Hash (string keys)
205
+
206
+ The callable should return a Hash with symbol keys. The supported values are:
207
+
208
+ - `type` (required)
209
+ - the entity "type" or name
210
+ - `mutability` (optional, default `'IMMUTABLE'`)
211
+ - either 'MUTABLE', 'IMMUTABLE', or 'SEGMENTED'
212
+ - `atomic` (optional, default `false`)
213
+ - when true, creates a new "atomic" block for this entity rather than apply
214
+ the entity to the current range
215
+ - `data` (optional, default `{}`)
216
+ - an arbitrary data-bag (Hash) of entity data
217
+
180
218
  ## Development
181
219
 
182
220
  After checking out the repo, run `bin/setup` to install dependencies. Then, run
@@ -0,0 +1,84 @@
1
+ require 'securerandom'
2
+
3
+ module DraftjsHtml
4
+ module Draftjs
5
+ class RawBuilder
6
+ def self.build(&block)
7
+ instance = new
8
+ instance.instance_eval(&block)
9
+ instance.to_h
10
+ end
11
+
12
+ def initialize
13
+ @blocks = []
14
+ @entity_map = {}
15
+ end
16
+
17
+ def text_block(text, depth: 0)
18
+ typed_block('unstyled', text, depth: depth)
19
+ end
20
+
21
+ def typed_block(type, text, depth: 0)
22
+ @blocks << { 'key' => SecureRandom.urlsafe_base64(10), 'text' => text, 'type' => type, 'depth' => depth }
23
+ end
24
+
25
+ def inline_style(style_name, range)
26
+ (@blocks.last['inlineStyleRanges'] ||= []) << { 'style' => style_name, 'offset' => range.begin, 'length' => range.size }
27
+ end
28
+
29
+ def entity_range(key, range)
30
+ (@blocks.last['entityRanges'] ||= []) << { 'key' => key, 'offset' => range.begin, 'length' => range.size }
31
+ end
32
+
33
+ def apply_entity(type, range, data: {}, mutability: 'IMMUTABLE', key: SecureRandom.uuid)
34
+ @entity_map[key] = {
35
+ 'type' => type,
36
+ 'mutability' => mutability,
37
+ 'data' => deep_stringify_keys(data),
38
+ }
39
+
40
+ entity_range(key, range)
41
+ end
42
+
43
+ def has_blocks?
44
+ @blocks.any?
45
+ end
46
+
47
+ def to_h
48
+ {
49
+ 'blocks' => @blocks,
50
+ 'entityMap' => @entity_map,
51
+ }
52
+ end
53
+
54
+ def to_s
55
+ draftjs = DraftjsHtml::Draftjs.parse(to_h)
56
+ draftjs.blocks.reduce('') do |acc, block|
57
+ acc << "typed_block '#{block.type}', '#{block.text}', depth: #{block.depth}\n"
58
+ block.inline_styles.each do |style|
59
+ acc << "inline_style '#{style.name}', #{style.range.begin}..#{style.range.end} # (#{style.offset} + #{style.length})\n"
60
+ end
61
+ block.entity_ranges.each do |entity_range|
62
+ entity = draftjs.entity_map[entity_range.name]
63
+ acc << "apply_entity '#{entity.type}', #{entity_range.range.begin}..#{entity_range.range.end}\n"
64
+ end
65
+ acc
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def deep_stringify_keys(object)
72
+ case object
73
+ when Hash
74
+ object.each_with_object({}) do |(key, value), result|
75
+ result[key.to_s] = deep_stringify_keys(value)
76
+ end
77
+ when Array then object.map { |e| deep_stringify_keys(e) }
78
+ else
79
+ object
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -14,6 +14,7 @@ module DraftjsHtml
14
14
 
15
15
  def convert_block(block)
16
16
  {
17
+ 'key' => block.key,
17
18
  'text' => block.text,
18
19
  'type' => block.type,
19
20
  'depth' => block.depth,
@@ -7,6 +7,7 @@ require_relative 'draftjs/block'
7
7
  require_relative 'draftjs/entity_map'
8
8
  require_relative 'draftjs/entity'
9
9
  require_relative 'draftjs/to_raw'
10
+ require_relative 'draftjs/raw_builder'
10
11
 
11
12
  module DraftjsHtml
12
13
  module Draftjs
@@ -0,0 +1,104 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ class DepthStack
4
+ def initialize
5
+ @stack = []
6
+ @nodes = []
7
+ @list_depth = -1
8
+ @style_stack = StyleStack.new
9
+ end
10
+
11
+ def push(tagname, attrs)
12
+ @stack << PendingBlock.from_tag(tagname, attrs, @nodes.dup, @list_depth)
13
+ track_block_node(tagname)
14
+ end
15
+
16
+ def push_parent(tagname, attrs)
17
+ @list_depth += 1
18
+ track_block_node(tagname)
19
+ end
20
+
21
+ def pop_parent(tagname, draftjs)
22
+ @nodes.pop
23
+ blocks = []
24
+ while current.depth >= 0
25
+ blocks << @stack.pop
26
+ @nodes.pop
27
+ end
28
+ blocks.reverse_each do |pending_block|
29
+ pending_block.flush_to(draftjs, @style_stack)
30
+ pending_block.apply_entities_to(draftjs)
31
+ end
32
+ @list_depth -= 1
33
+ end
34
+
35
+ def pop(draftjs)
36
+ return if @stack.empty?
37
+ return if inside_parent?
38
+
39
+ if @nodes.last == current.tagname && current.flushable?
40
+ flush_to(draftjs)
41
+ elsif @stack[-2]
42
+ @stack[-2].consume(current)
43
+ end
44
+
45
+ @stack.pop
46
+ @nodes.pop
47
+ end
48
+
49
+ def create_pending_entity(tagname, attrs)
50
+ current.pending_entities << { tagname: tagname, start: current_character_offset + 1, attrs: attrs }
51
+ end
52
+
53
+ def convert_pending_entities(conversion)
54
+ while current.pending_entities.any?
55
+ pending_entity = current.pending_entities.pop
56
+ range = pending_entity[:start]..current_character_offset
57
+ user_created_entity = conversion.call(pending_entity[:tagname], current_text_buffer[range], pending_entity[:attrs])
58
+ next unless user_created_entity
59
+
60
+ current.entities << user_created_entity.merge(start: range.begin, finish: range.end)
61
+ end
62
+ end
63
+
64
+ def style_start(tagname)
65
+ @style_stack.track_start(tagname, current_character_offset + 1)
66
+ end
67
+
68
+ def style_end(tagname)
69
+ @style_stack.track_end(tagname, current_character_offset)
70
+ end
71
+
72
+ def flush_to(draftjs)
73
+ current.flush_to(draftjs, @style_stack)
74
+ current.apply_entities_to(draftjs)
75
+ end
76
+
77
+ def append_text(chars)
78
+ current.text_buffer << chars unless chars.empty?
79
+ end
80
+
81
+ private
82
+
83
+ def current_text_buffer
84
+ current.text_buffer.join
85
+ end
86
+
87
+ def current_character_offset
88
+ current.character_offset
89
+ end
90
+
91
+ def track_block_node(name)
92
+ @nodes << name
93
+ end
94
+
95
+ def inside_parent?
96
+ (FromHtml::LIST_PARENT_ELEMENTS & @nodes).any?
97
+ end
98
+
99
+ def current
100
+ @stack.last
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,8 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ INLINE_STYLE_ELEMENTS = HtmlDefaults::HTML_STYLE_TAGS_TO_STYLE.keys.freeze
4
+ LIST_PARENT_ELEMENTS = %w[ol ul table].freeze
5
+ INLINE_NON_STYLE_ELEMENTS = %w[a abbr cite font img output q samp span thead tbody td time var].freeze
6
+ BLOCK_CONTENT_ELEMENTS = %w[p dl h1 h2 h3 h4 h5 h6].freeze
7
+ end
8
+ end
@@ -0,0 +1,85 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ PendingBlock = Struct.new(:tagname, :attrs, :chars, :entities, :pending_entities, :parent_tagnames, :depth, keyword_init: true) do
4
+ def self.from_tag(name, attrs, parent_tagnames, depth)
5
+ self.new(
6
+ tagname: name,
7
+ attrs: attrs,
8
+ entities: [],
9
+ chars: [],
10
+ pending_entities: [],
11
+ depth: depth,
12
+ parent_tagnames: parent_tagnames,
13
+ )
14
+ end
15
+
16
+ def text_buffer
17
+ self[:chars]
18
+ end
19
+
20
+ def clear_text_buffer
21
+ self[:chars] = []
22
+ end
23
+
24
+ def character_offset
25
+ text_buffer.join.length - 1
26
+ end
27
+
28
+ def flushable?
29
+ %w[OPENING ol ul li table].include?(parent_tagnames.last) ||
30
+ (parent_tagnames.last == 'div' && tagname != 'div')
31
+ end
32
+
33
+ def consume(other_pending_block)
34
+ self.text_buffer += other_pending_block.text_buffer
35
+ self.pending_entities += other_pending_block.pending_entities
36
+ self.entities += other_pending_block.entities
37
+ end
38
+
39
+ def flush_to(draftjs, styles)
40
+ if text_buffer.any?
41
+ chars.join.lines.each do |line|
42
+ draftjs.typed_block(block_name, line.strip, depth: [depth, 0].max)
43
+ end
44
+
45
+ styles.each do |descriptor|
46
+ finish = descriptor[:finish] || character_offset
47
+ draftjs.inline_style(descriptor[:style], descriptor[:start]..finish)
48
+ end
49
+ end
50
+
51
+ clear_text_buffer
52
+ styles.clear_finished
53
+ end
54
+
55
+ def apply_entities_to(draftjs)
56
+ Array(entities).each do |entity|
57
+ range = entity[:start]..entity[:finish]
58
+ if entity[:atomic]
59
+ draftjs.typed_block('atomic', ' ', depth: [depth, 0].max)
60
+ range = 0..1
61
+ elsif range.size < 1
62
+ draftjs.typed_block('atomic', ' ', depth: [depth, 0].max) unless draftjs.has_blocks?
63
+ range = (range.begin..range.end + 1)
64
+ end
65
+
66
+ draftjs.apply_entity entity[:type], range, data: entity[:data], mutability: entity.fetch(:mutability, 'IMMUTABLE')
67
+ end
68
+ end
69
+
70
+ def block_name
71
+ stack = parent_tagnames.last == 'li' ? parent_tagnames.last(2) : parent_tagnames.last(1)
72
+ return 'ordered-list-item' if stack.first == 'ol'
73
+ return 'unordered-list-item' if stack.first == 'ul'
74
+
75
+ DraftjsHtml::HtmlDefaults::BLOCK_TYPE_TO_HTML.invert.fetch(tagname, 'unstyled')
76
+ end
77
+
78
+ private
79
+
80
+ def text_buffer=(other)
81
+ self[:chars] = other
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,50 @@
1
+ module DraftjsHtml
2
+ class FromHtml < Nokogiri::XML::SAX::Document
3
+ class StyleStack
4
+ def initialize
5
+ @stack = []
6
+ end
7
+
8
+ def clear_finished
9
+ @stack.delete_if { !!_1[:finish] }
10
+ end
11
+
12
+ def each(&block)
13
+ @stack.reverse_each.group_by { _1[:tagname] }.each do |_, descriptors|
14
+ overlapping_ranges = find_overlapping_styles(descriptors)
15
+ widest_descriptor = overlapping_ranges.max_by { (_1[:start].._1[:finish]).size }
16
+
17
+ applicable_styles = descriptors - overlapping_ranges + [widest_descriptor].compact
18
+ applicable_styles.each(&block)
19
+ end
20
+ end
21
+
22
+ def track_start(tagname, current_character_offset)
23
+ style = DraftjsHtml::HtmlDefaults::HTML_STYLE_TAGS_TO_STYLE[tagname]
24
+ @stack.unshift({ tagname: tagname, style: style, start: current_character_offset })
25
+ end
26
+
27
+ def track_end(tagname, current_character_offset)
28
+ descriptor_index = @stack.find_index { _1[:tagname] == tagname && !_1[:finish] }
29
+ descriptor = @stack[descriptor_index]
30
+ descriptor[:finish] = current_character_offset
31
+ end
32
+
33
+ private
34
+
35
+ def find_overlapping_styles(descriptors)
36
+ descriptors.select do |candidate_a|
37
+ candidate_range = candidate_a[:start]..candidate_a[:finish]
38
+ (descriptors - [candidate_a]).any? do |other|
39
+ other_range = other[:start]..other[:finish]
40
+ range_overlaps?(candidate_range, other_range)
41
+ end
42
+ end
43
+ end
44
+
45
+ def range_overlaps?(candidate_range, other_range)
46
+ other_range.begin == candidate_range.begin || candidate_range.cover?(other_range.begin) || other_range.cover?(candidate_range.begin)
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,107 @@
1
+ require 'stringio'
2
+ require_relative 'html_defaults'
3
+ require_relative 'from_html/elements'
4
+ require_relative 'from_html/style_stack'
5
+ require_relative 'from_html/pending_block'
6
+ require_relative 'from_html/depth_stack'
7
+
8
+ module DraftjsHtml
9
+ class FromHtml < Nokogiri::XML::SAX::Document
10
+
11
+ def initialize(options = {})
12
+ @draftjs = Draftjs::RawBuilder.new
13
+ @parser = Nokogiri::HTML4::SAX::Parser.new(self)
14
+ @depth_stack = DepthStack.new
15
+ @options = ensure_options!(options.dup)
16
+ end
17
+
18
+ def convert(raw_html)
19
+ convert_io(StringIO.new(raw_html))
20
+ end
21
+
22
+ def convert_io(html_io)
23
+ @parser.parse(html_io)
24
+ @draftjs.to_h
25
+ end
26
+
27
+ def characters(str)
28
+ content = strip_unnecessary_trailing_space(str)
29
+ @depth_stack.append_text(content)
30
+ end
31
+
32
+ def end_element(name)
33
+ track_pending_entity_end(name)
34
+
35
+ case name
36
+ when 'br' then return
37
+ when 'html', 'body', *FromHtml::INLINE_NON_STYLE_ELEMENTS
38
+ when *FromHtml::INLINE_STYLE_ELEMENTS
39
+ track_inline_style_end(name)
40
+ when *FromHtml::LIST_PARENT_ELEMENTS
41
+ @depth_stack.pop_parent(name, @draftjs)
42
+ else
43
+ @depth_stack.pop(@draftjs)
44
+ end
45
+ end
46
+
47
+ def start_element(name, attrs = [])
48
+ attributes = Hash[attrs]
49
+
50
+ case name
51
+ when 'br'
52
+ @depth_stack.append_text("\n")
53
+ when 'html', 'body', *FromHtml::INLINE_NON_STYLE_ELEMENTS
54
+ when *FromHtml::INLINE_STYLE_ELEMENTS
55
+ track_inline_style_start(name)
56
+ when *FromHtml::LIST_PARENT_ELEMENTS
57
+ @depth_stack.push_parent(name, attrs)
58
+ else
59
+ @depth_stack.push(name, attributes)
60
+ end
61
+
62
+ track_pending_entity_start(name, attributes)
63
+ end
64
+
65
+ def start_document
66
+ @depth_stack.push('OPENING', {})
67
+ end
68
+
69
+ def end_document
70
+ @depth_stack.flush_to(@draftjs)
71
+ end
72
+
73
+ private
74
+
75
+ def track_inline_style_start(tagname)
76
+ @depth_stack.style_start(tagname)
77
+ end
78
+
79
+ def track_inline_style_end(tagname)
80
+ @depth_stack.style_end(tagname)
81
+ end
82
+
83
+ def strip_unnecessary_trailing_space(str)
84
+ str
85
+ .gsub(/(\n+[[:space:]]*$)|(^\n+)/, '')
86
+ .gsub(/(^[[:space:]]+$)/, ' ')
87
+ end
88
+
89
+ def track_pending_entity_start(tagname, attrs)
90
+ @depth_stack.create_pending_entity(tagname, attrs)
91
+ end
92
+
93
+ def track_pending_entity_end(name)
94
+ @depth_stack.convert_pending_entities(@options[:node_to_entity])
95
+ end
96
+
97
+ def ensure_options!(opts)
98
+ opts[:node_to_entity] ||= ->(tagname, _content, attrs) {
99
+ case tagname
100
+ when 'a' then { type: 'LINK', mutability: 'MUTABLE', data: attrs }
101
+ when 'img' then { type: 'IMAGE', mutability: 'IMMUTABLE', atomic: true, data: attrs }
102
+ end
103
+ }
104
+ opts
105
+ end
106
+ end
107
+ end
@@ -1,8 +1,8 @@
1
1
  module DraftjsHtml
2
2
  module HtmlDefaults
3
3
  BLOCK_TYPE_TO_HTML = {
4
- 'unstyled' => 'p',
5
4
  'paragraph' => 'p',
5
+ 'unstyled' => 'p',
6
6
  'header-one' => 'h1',
7
7
  'header-two' => 'h2',
8
8
  'header-three' => 'h3',
@@ -23,6 +23,18 @@ module DraftjsHtml
23
23
  'UNDERLINE' => 'u',
24
24
  }.freeze
25
25
 
26
+ HTML_STYLE_TAGS_TO_STYLE = {
27
+ 'b' => 'BOLD',
28
+ 'i' => 'ITALIC',
29
+ 'em' => 'ITALIC',
30
+ 'del' => 'STRIKETHROUGH',
31
+ 'u' => 'UNDERLINE',
32
+ 'strong' => 'BOLD',
33
+ 'small' => 'SMALL',
34
+ 'sub' => 'SUBSCRIPT',
35
+ 'sup' => 'SUPERSCRIPT',
36
+ }.freeze
37
+
26
38
  ENTITY_ATTRIBUTE_NAME_MAP = {
27
39
  'className' => 'class',
28
40
  'url' => 'href',
@@ -47,4 +59,4 @@ module DraftjsHtml
47
59
  }
48
60
  }.freeze
49
61
  end
50
- end
62
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DraftjsHtml
4
- VERSION = "0.13.0"
4
+ VERSION = "0.15.0"
5
5
  end
data/lib/draftjs_html.rb CHANGED
@@ -4,6 +4,7 @@ require_relative "draftjs_html/version"
4
4
  require 'nokogiri'
5
5
  require_relative 'draftjs_html/draftjs'
6
6
  require_relative 'draftjs_html/to_html'
7
+ require_relative 'draftjs_html/from_html'
7
8
 
8
9
  module DraftjsHtml
9
10
  class Error < StandardError; end
@@ -11,4 +12,8 @@ module DraftjsHtml
11
12
  def self.to_html(raw_draftjs, options: {})
12
13
  ToHtml.new(options).convert(raw_draftjs)
13
14
  end
15
+
16
+ def self.from_html(html_str, options: {})
17
+ FromHtml.new(options).convert(html_str)
18
+ end
14
19
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: draftjs_html
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TJ Taylor
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-10-13 00:00:00.000000000 Z
11
+ date: 2022-11-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -64,7 +64,13 @@ files:
64
64
  - lib/draftjs_html/draftjs/content.rb
65
65
  - lib/draftjs_html/draftjs/entity.rb
66
66
  - lib/draftjs_html/draftjs/entity_map.rb
67
+ - lib/draftjs_html/draftjs/raw_builder.rb
67
68
  - lib/draftjs_html/draftjs/to_raw.rb
69
+ - lib/draftjs_html/from_html.rb
70
+ - lib/draftjs_html/from_html/depth_stack.rb
71
+ - lib/draftjs_html/from_html/elements.rb
72
+ - lib/draftjs_html/from_html/pending_block.rb
73
+ - lib/draftjs_html/from_html/style_stack.rb
68
74
  - lib/draftjs_html/html_defaults.rb
69
75
  - lib/draftjs_html/html_depth.rb
70
76
  - lib/draftjs_html/node.rb