hitomalu_formatter 0.1.0 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/Gemfile.lock +2 -2
- data/lib/hitomalu_formatter/formatter.rb +5 -3
- data/lib/hitomalu_formatter/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '04239c9c7d366f97db31ee7a67a15936ad16fe3900d0816b32409b9b6d944dec'
|
4
|
+
data.tar.gz: 6a26df5123e66d197952a98c30e1179a2d2c9c5620d07d1c5916aaddda9afd50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2999cc3946c94771090f582c649359fac349209fe5aa953173767f9ab1235ec366c4b5dcb6c8d6432411527694970aae15663a749a8358c5f8e90d4a9a57e2b
|
7
|
+
data.tar.gz: bc946c6f1cea4481ca105cd42dc6cef6993baaa64d76e1b58e9a9644fb5f1a9effb1dbf40a527a11f5c9f386f427e5bde117523b57ce2006ec96dcd30be531d7
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
hitomalu_formatter (0.1.
|
4
|
+
hitomalu_formatter (0.1.3)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
diff-lcs (1.5.0)
|
10
|
-
nokogiri (1.13.
|
10
|
+
nokogiri (1.13.8-x86_64-linux)
|
11
11
|
racc (~> 1.4)
|
12
12
|
racc (1.6.0)
|
13
13
|
rake (13.0.6)
|
@@ -6,7 +6,9 @@ module Hitomalu
|
|
6
6
|
|
7
7
|
# cf. https://developer.mozilla.org/ja/docs/Web/HTML/Inline_elements
|
8
8
|
# rp と rt は上に記載がないが、改行させたくないので追加
|
9
|
-
|
9
|
+
# node.name が 'comment' であるコメントノードもインライン扱い
|
10
|
+
INLINE_TAGS = [ 'a', 'abbr', 'acronym', 'audio', 'b', 'bdi', 'bdo', 'big', 'br', 'button', 'canvas', 'cite', 'code', 'comment', 'data', 'datalist', 'del', 'dfn', 'em', 'embed', 'i', 'iframe', 'img', 'input', 'ins', 'kbd', 'label', 'map', 'mark', 'meter', 'noscript', 'object', 'output', 'picture', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp', 'script', 'select', 'slot', 'small', 'span', 'strong', 'sub', 'sup', 'svg', 'template', 'textarea', 'time', 'u', 'tt', 'var', 'video', 'wbr' ]
|
11
|
+
INLINE_TAGS_REGEXP = INLINE_TAGS.join('|')
|
10
12
|
|
11
13
|
def self.format(html)
|
12
14
|
# 改行コード \n を \r\n に統一しておく
|
@@ -72,11 +74,11 @@ module Hitomalu
|
|
72
74
|
body_str = body.to_s.gsub(/(\|mykaigyo\|)+/, "\r\n").gsub('|myspace|', ' ').gsub(/(<\/wbr>|\|mykaramojiretsu\||\A<body>(\n|\r\n)*|(\n|\r\n)*<\/body>\Z)/, "").gsub(/(?<!\r)\n/, "\r\n")
|
73
75
|
|
74
76
|
# </span></div> のような閉じタグの間に改行が無かったら改行を入れる (Nokogiri の add_next_sibling は 20000回するとメモリを6GB以上使うので文字列処理でやる)
|
75
|
-
#
|
77
|
+
# ただし、片方または両方がインライン要素の閉じタグだったら入れない
|
76
78
|
prev_str = ""
|
77
79
|
while body_str != prev_str do
|
78
80
|
prev_str = body_str
|
79
|
-
body_str = body_str.gsub(/(<\/[a-z]+>)(<\/(?!
|
81
|
+
body_str = body_str.gsub(/(<\/(?!(#{INLINE_TAGS_REGEXP}))[a-z]+>)(<\/(?!(#{INLINE_TAGS_REGEXP}))[a-z]+>)/, "\\1\r\n\\3")
|
80
82
|
end
|
81
83
|
|
82
84
|
# 開始タグの前に改行がなければ改行を入れる (add_prev_sibling は重いので文字列処理でやる) (文章の先頭からは消す)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hitomalu_formatter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- t-hazawa
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-08-04 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: []
|