hitomalu_formatter 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/Gemfile.lock +2 -2
- data/lib/hitomalu_formatter/formatter.rb +5 -3
- data/lib/hitomalu_formatter/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '04239c9c7d366f97db31ee7a67a15936ad16fe3900d0816b32409b9b6d944dec'
|
4
|
+
data.tar.gz: 6a26df5123e66d197952a98c30e1179a2d2c9c5620d07d1c5916aaddda9afd50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2999cc3946c94771090f582c649359fac349209fe5aa953173767f9ab1235ec366c4b5dcb6c8d6432411527694970aae15663a749a8358c5f8e90d4a9a57e2b
|
7
|
+
data.tar.gz: bc946c6f1cea4481ca105cd42dc6cef6993baaa64d76e1b58e9a9644fb5f1a9effb1dbf40a527a11f5c9f386f427e5bde117523b57ce2006ec96dcd30be531d7
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
hitomalu_formatter (0.1.
|
4
|
+
hitomalu_formatter (0.1.3)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
diff-lcs (1.5.0)
|
10
|
-
nokogiri (1.13.
|
10
|
+
nokogiri (1.13.8-x86_64-linux)
|
11
11
|
racc (~> 1.4)
|
12
12
|
racc (1.6.0)
|
13
13
|
rake (13.0.6)
|
@@ -6,7 +6,9 @@ module Hitomalu
|
|
6
6
|
|
7
7
|
# cf. https://developer.mozilla.org/ja/docs/Web/HTML/Inline_elements
|
8
8
|
# rp と rt は上に記載がないが、改行させたくないので追加
|
9
|
-
|
9
|
+
# node.name が 'comment' であるコメントノードもインライン扱い
|
10
|
+
INLINE_TAGS = [ 'a', 'abbr', 'acronym', 'audio', 'b', 'bdi', 'bdo', 'big', 'br', 'button', 'canvas', 'cite', 'code', 'comment', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i', 'iframe', 'img', 'input', 'ins', 'kbd', 'label', 'map', 'mark', 'meter', 'noscript', 'object', 'output', 'picture', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'script', 'select', 'slot', 'small', 'span', 'strong', 'sub', 'sup', 'svg', 'template', 'textarea', 'time', 'u', 'tt', 'var', 'video', 'wbr' ]
|
11
|
+
INLINE_TAGS_REGEXP = INLINE_TAGS.join('|')
|
10
12
|
|
11
13
|
def self.format(html)
|
12
14
|
# 改行コード \n を \r\n に統一しておく
|
@@ -72,11 +74,11 @@ module Hitomalu
|
|
72
74
|
body_str = body.to_s.gsub(/(\|mykaigyo\|)+/, "\r\n").gsub('|myspace|', ' ').gsub(/(<\/wbr>|\|mykaramojiretsu\||\A<body>(\n|\r\n)*|(\n|\r\n)*<\/body>\Z)/, "").gsub(/(?<!\r)\n/, "\r\n")
|
73
75
|
|
74
76
|
# </span></div> のような閉じタグの間に改行が無かったら改行を入れる (Nokogiri の add_next_sibling は 20000回するとメモリを6GB以上使うので文字列処理でやる)
|
75
|
-
#
|
77
|
+
# ただし、片方または両方がインライン要素の閉じタグだったら入れない
|
76
78
|
prev_str = ""
|
77
79
|
while body_str != prev_str do
|
78
80
|
prev_str = body_str
|
79
|
-
body_str = body_str.gsub(/(<\/[a-z]+>)(<\/(?!
|
81
|
+
body_str = body_str.gsub(/(<\/(?!(#{INLINE_TAGS_REGEXP}))[a-z]+>)(<\/(?!(#{INLINE_TAGS_REGEXP}))[a-z]+>)/, "\\1\r\n\\3")
|
80
82
|
end
|
81
83
|
|
82
84
|
# 開始タグの前に改行がなければ改行を入れる (add_prev_sibling は重いので文字列処理でやる) (文章の先頭からは消す)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hitomalu_formatter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- t-hazawa
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: []
|