replace 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/rep +41 -0
  3. data/lib/replace.rb +435 -0
  4. metadata +117 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 90dc2484ac3ec0eb152c877e9821e63b06cb22d1
4
+ data.tar.gz: a36909c6a332eed9fa64540170559800fb0839b8
5
+ SHA512:
6
+ metadata.gz: 9e54c6df5c3f74a40ef21113827366a9a804fc97bd369ae79e80ac5a46326cef10a5066f095d102283fc16d566f9dbb86c10560dde21cf5f74fa0abf1e3d727d
7
+ data.tar.gz: 158b0fcfd65555c09d6ac582851c0ddd4254e273d825713c02eb69d9a9a53b059246602e714501434a3a70d8ede2d53393d834b6333dd89c5ac68413ed6d8ca3
data/bin/rep ADDED
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ # replace files using regexp
4
+
5
+ require 'optparse'
6
+ require 'replace'
7
+
8
+ # We set default values here.
9
+ options = {
10
+ }
11
+
12
+ ROOT = File.dirname(__FILE__)
13
+ filename = File.join(ROOT, '..', 'lib', 'replace.rb')
14
+ string = File.read(filename)
15
+ replace = Replace.new(string)
16
+ method_comments = replace.help
17
+
18
+ OptionParser.new do |opts|
19
+ opts.banner = 'Usage: rep [options] method [files]'
20
+ methods = Replace.instance_methods - Replace.superclass.instance_methods
21
+ methods.each do |method|
22
+ opts.banner << "\n#{method}"
23
+ opts.banner << "\n#{method_comments[method]}"
24
+ end
25
+ end.parse!
26
+
27
+ files = '*.md'
28
+ files = ARGV[1] if ARGV.length > 1
29
+ method = ARGV[0]
30
+ Dir[files].map do |file|
31
+ string = File.read(file)
32
+ replace = Replace.new(string)
33
+ unless replace.respond_to?(method)
34
+ puts "The object has no the method #{method}, try rep -h to get help."
35
+ exit(0)
36
+ end
37
+ replace.send(method)
38
+ File.open(file).chmod(0644) unless File.writable?(file)
39
+ File.write(file, replace.string)
40
+ puts replace.scan unless replace.scan.nil?
41
+ end
data/lib/replace.rb ADDED
@@ -0,0 +1,435 @@
1
+ require 'yaml'
2
+ require 'pandoc-ruby'
3
+ require 'ropencc'
4
+
5
+ class Replace
6
+ attr_reader :string, :scan
7
+
8
+ def initialize(string)
9
+ @string = string
10
+ end
11
+
12
+ def help
13
+ method_comments = {}
14
+ replace(@string) do
15
+ s /((.*#.*\r?\n)*)\s*def\s+(\w+)/ do
16
+ method_comments[$3.to_sym] = $1
17
+ end
18
+ end
19
+ method_comments
20
+ end
21
+
22
+ def scan_test
23
+ @scan = @string.scan(/\w+/)
24
+ end
25
+
26
+ def scan_url
27
+ @scan = @string.scan(/href=['"](.*?)['"]/)
28
+ end
29
+
30
+ def scan_image
31
+ @scan = @string.scan(/!\[.*?\]\(([^\s]+?)(?:\s+.*?)?\)/)
32
+ end
33
+
34
+ # 扫描注释列表生成替换字典
35
+ def scan_note
36
+ del_head_blank
37
+ note = {}
38
+ # @string.scan(/^[((]\d+[))]\s*(.*?)[::]\s*(.*?)\\?\r?\n/) do |key, value|
39
+ @string.scan(/^(.*?)〔(.*?〕.*?)\r?\n/) do |key, value|
40
+ # key_stem = key.gsub(/[((](.*?)[))]/, '')
41
+ key_stem = "\\^#{key}\\^"
42
+ # note[key_stem] = "#{key}: #{value}"
43
+ note[key_stem] = value.sub(/〕/, ': ')
44
+ end
45
+ note
46
+ end
47
+
48
+ # 批量逐个替换第一个匹配项
49
+ def batch_replace(regexps = {})
50
+ regexps.each do |key, value|
51
+ replace(@string) do
52
+ sub! Regexp.new("\\G(.*?)#{key}", Regexp::MULTILINE), '\1'" ^[#{value}] "
53
+ end
54
+ end
55
+ self
56
+ end
57
+
58
+ def footnote
59
+ batch_replace(scan_note)
60
+ end
61
+
62
+ def simple
63
+ replace(@string) do
64
+ s /cc/, 'dd'
65
+ s /aa/, 'bb'
66
+ end
67
+ self
68
+ end
69
+
70
+ # 处理 Shell 命令 tree 的输出 (通过验证, 危险等级: 0)
71
+ def tree
72
+ replace(@string) do
73
+ s /[│├]/, '|'
74
+ s /[└]/, '\\'
75
+ s /[─]/, '-'
76
+ end
77
+ self
78
+ end
79
+
80
+ def rename
81
+ replace(@string) do
82
+ s /!\[\]\(image(\d+).jpg\)/ do
83
+ i = $1.to_i - 1
84
+ "![](image%03d.jpg)" % i
85
+ end
86
+ end
87
+ self
88
+ end
89
+
90
+ def pre_pandoc_for_latex
91
+ title
92
+ end
93
+
94
+ def post_pandoc_for_latex
95
+ replace(@string) do
96
+ s /\{verbatim\}/, '{Verbatim}'
97
+ s /\\begin\{center\}\\rule\{(.*?)\}\{(.*?)\}\\end\{center\}/, '\newpage'
98
+ s /\s*\\footnote\{(.*?)\}\s*/, '\footnote{\1}'
99
+ s /\\footnote\{(.*?)[::]\s*(.*?)\}/, '〔{\kaishu \1: \2}〕'
100
+ end
101
+ theorem
102
+ end
103
+
104
+ # 标准化 Markdown 文件, 处理 HTML 文件的转换结果 (未通过验证, 危险等级: 4)
105
+ # code.punct2.blank
106
+ def standard
107
+ blank.del_line_break.punct2.code.add_line_break.format_markdown
108
+ end
109
+
110
+ # 处理 pdftotext 的转换结果 (未通过验证, 危险等级: 4)
111
+ # paragraph.blank.del_line_break.chapter.list.punct2.add_line_break
112
+ def pdftotext
113
+ replace(@string) do
114
+ # 删除页码行
115
+ s /^[[:blank:]]*[0-9]+[[:blank:]]*\r?\n/, ''
116
+ end
117
+ paragraph.blank.del_line_break.chapter.list.punct2.add_line_break
118
+ end
119
+
120
+ # 中文标点转为英文标点
121
+ def punct1
122
+ replace(@string) do
123
+ s /,/, ', '
124
+ s /:([^\r\n])/, ":\n"'\1'
125
+ s /;([^\r\n])/, ";\n"'\1'
126
+ s /。([^\r\n])/, ".\n"'\1'
127
+ s /?([^\r\n])/, "?\n"'\1'
128
+ s /!([^\r\n])/, "!\n"'\1'
129
+ s /:\r?\n/, ":\n"
130
+ s /;\r?\n/, ";\n"
131
+ s /。\r?\n/, ".\n"
132
+ s /?\r?\n/, "?\n"
133
+ s /!\r?\n/, "!\n"
134
+ s /(/, ' ('
135
+ s /)/, ') '
136
+ s /\) ([,.])/, ')\1'
137
+ end
138
+ self
139
+ end
140
+
141
+ # 中文标点转为英文标点 (通过验证, 危险等级: 3, 可能需要用中文标点)
142
+ # 保留部分中文符号: 、《》〈〉【】〖〗〔〕
143
+ # ascii2: ?!,;:()
144
+ def punct2
145
+ replace(@string) do
146
+ # ‐‑‒–—―‖‗‘’‚‛“”„‟
147
+ # †‡•‣․‥…‧
148
+ # ‰‱′″‴‵‶‷‸‹›※‼‽‾‿
149
+ # ⁀⁁⁂⁃
150
+ # ⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏
151
+ # ⁐⁑
152
+ # ⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞
153
+ # ⁽⁾
154
+ # 、。〃
155
+ # 〈〉《》「」『』
156
+ # 【】
157
+ # 〔〕〖〗〘〙〚〛〜〝〞〟
158
+ # 〰
159
+ # 〽
160
+ # \p{S}: $+<=>^`|~⁄⁒
161
+ # \p{Sm}: +<=>|~⁄⁒
162
+ # \p{Sc}: $
163
+ # \p{Sk}: ^`
164
+ # \p{Pi}: ‘‛“‟
165
+ # \p{Pf}: ’”
166
+ # 句末符号 .!?;:
167
+ # 标点符号 `$()''""
168
+ # 句中符号 ,、
169
+ s /。/, '.'
170
+ s /[“”]/, '"'
171
+ s /[‘’]/, "'"
172
+ s /──/, '---'
173
+ s /—/, '--'
174
+ end
175
+ ascii2
176
+ end
177
+
178
+ # 台湾标点转大陆标点 (通过验证, 危险等级: 0)
179
+ # ascii2
180
+ def taiwan
181
+ replace(@string) do
182
+ s /「/, '‘'
183
+ s /」/, '’'
184
+ s /『/, '“'
185
+ s /』/, '”'
186
+ end
187
+ ascii2
188
+ end
189
+
190
+ # 双字节 ASCII 字符转为单字节字符 (通过验证, 危险等级: 0)
191
+ # !"#$%&'()*+,-./
192
+ # 0123456789:;<=>?
193
+ # @ABCDEFGHIJKLMNO
194
+ # PQRSTUVWXYZ[\]^_
195
+ # `abcdefghijklmno
196
+ # pqrstuvwxyz{|}~
197
+ # !"#$%&'()*+,-./
198
+ # 0123456789:;<=>?
199
+ # @ABCDEFGHIJKLMNO
200
+ # PQRSTUVWXYZ[\]^_
201
+ # `abcdefghijklmno
202
+ # pqrstuvwxyz{|}~
203
+ def ascii2
204
+ replace(@string) do
205
+ s /([\u{FF01}-\u{FF5E}])/ do
206
+ bytes = $1.bytes
207
+ bytes[1] -= 0xBC
208
+ bytes[2] -= 0x60
209
+ bytes[2] += 64*bytes[1]
210
+ bytes[2..2].pack("c*")
211
+ end
212
+ end
213
+ self
214
+ end
215
+
216
+ # 删除一些没必要的分行
217
+ def del_line_break
218
+ replace(@string) do
219
+ # "无\n法\n处\n理\n这\n种\n情\n况"
220
+ s /(\p{Han})\r?\n(\p{Han})/, '\1\2'
221
+ s /(\p{Han})\r?\n([[:punct:]])/, '\1\2'
222
+ s /…{3,}(\r?\n)+/, ''
223
+ end
224
+ self
225
+ end
226
+
227
+ # 增加一些必要的分行
228
+ def add_line_break
229
+ replace(@string) do
230
+ s /(\p{Han})[[:blank:]]*([:,])[[:blank:]]*(\p{Han})/, '\1\2 \3'
231
+ s /(\p{Han})[[:blank:]]*([。.!?;])[[:blank:]]*(\p{Han})/, '\1\2'"\n"'\3'
232
+ s /(\p{Han})[[:blank:]]*(\p{Ps})/, '\1 \2'
233
+ s /(\p{Pe})[[:blank:]]*(\p{Han})/, '\1 \2'
234
+ end
235
+ self
236
+ end
237
+
238
+ # 删除汉字之间的空格 (通过验证, 危险等级: 3)
239
+ # 添加汉字与数字、英文之间的空格
240
+ # del_head_blank.del_blank_line
241
+ def blank
242
+ replace(@string) do
243
+ # 删除汉字之间的空格, "无 法 处 理 这 种 情 况"
244
+ s /(\p{Han})[[:blank:]]+(\p{Han})/, '\1\2'
245
+ # 添加汉字与数字、英文之间的空格
246
+ s /(\p{Han})(\w)/, '\1 \2'
247
+ s /(\w)(\p{Han})/, '\1 \2'
248
+ end
249
+ del_head_blank.del_blank_line
250
+ end
251
+
252
+ # 删除行首的空白 (通过验证, 危险等级: 3, 可能是 Markdown 缩进)
253
+ # 将看上去像空白的行转化为真真的空白行
254
+ def del_head_blank
255
+ replace(@string) do
256
+ s /^[[:blank:]]+/, ''
257
+ end
258
+ self
259
+ end
260
+
261
+ # 删除行尾的空白 (通过验证, 危险等级: 0)
262
+ # 将看上去像空白的行转化为真真的空白行
263
+ def del_tail_blank
264
+ replace(@string) do
265
+ s /[[:blank:]]+\r?\n/, "\n"
266
+ end
267
+ self
268
+ end
269
+
270
+ # 删除多余的空行 (通过验证, 危险等级: 0)
271
+ # del_tail_blank
272
+ def del_blank_line
273
+ replace(@string) do
274
+ s /(^[[:blank:]]*\r?\n){2,}/, "\n"
275
+ end
276
+ del_tail_blank
277
+ end
278
+
279
+ # 处理插图路径 (通过验证, 危险等级: 0)
280
+ def image
281
+ replace(@string) do
282
+ s /Insert\s(18333fig\d+)\.png\s*\n.*?\d{1,2}-\d{1,2}\. (.*)/, '![\2](\1-tn.png)'
283
+ s /!\[(.*?)\]\(\S*\/(\S*?)( ".*")?\)/, '![\1](\2)'
284
+ end
285
+ self
286
+ end
287
+
288
+ # 删除页眉页脚
289
+ def head_foot
290
+ replace(@string) do
291
+ s /\A(^[^\r\n]*\r?\n){11}\s*/m, ''
292
+ s /^\[«.*?\z/m, ''
293
+ # s /(^.*?\r?\n){4}\z/, ''
294
+ end
295
+ self
296
+ end
297
+
298
+ # 行内代码两边各留一个空格 (未通过验证, 危险等级: 4)
299
+ # jekyll_code
300
+ def code
301
+ replace(@string) do
302
+ # 行内代码两边各留一个空格
303
+ s /([[:alnum:]])`([^`]+?)`([[:alnum:]])/, '\1 `\2` \3'
304
+ end
305
+ jekyll_code
306
+ end
307
+
308
+ # Jekyll 代码格式转为 Fenced 代码格式 (通过验证, 危险等级: 0)
309
+ def jekyll_code
310
+ replace(@string) do
311
+ s /\s*\{%\s*highlight\s+(\w+)\s*%\}\s*/, "\n\n"'```{.\1}'"\n"
312
+ s /\s*\{%\s*endhighlight\s*%\}\s*/, "\n"'```'"\n\n"
313
+ end
314
+ self
315
+ end
316
+
317
+ # 定理环境, LaTeX 命令 (未通过验证, 危险等级: 2)
318
+ def theorem
319
+ replace(@string) do
320
+ s /^(ASSUMPTION|DEFINITION|CONCLUSION|ALGORITHM|EXPERIMENT|EXAMPLE|REMARK|NNOTE|THEOREM|AXIOM|LEMMA|PROPERTY|COROLLARY|PROPOSITION|CLAIM|PROBLEM|QUESTION|CONJECTURE|PROOF|SOLUTION|ANSWER|ANALYSIS)[.:](.*?)(\n(?=\n)|\Z)/mi do
321
+ css_class = $1.downcase
322
+ "\\begin{#{css_class}}\n#{$2.strip}\n\\end{#{css_class}}\n"
323
+ end
324
+ end
325
+ replace(@string) do
326
+ s /^(PART)[.:](.*?)(\n(?=\n)|\Z)/mi do
327
+ "\\#{$1.downcase}{#{$2.strip}}\n"
328
+ end
329
+ end
330
+ self
331
+ end
332
+
333
+ # 转换 YAML 标题信息 (通过验证, 危险等级: 0)
334
+ def title
335
+ replace(@string) do
336
+ s /\A^-{3,}\r?\n(.*?)^-{3,}\r?\n/m do
337
+ doc = YAML::load($1)
338
+ "# #{doc['title']}\n\n" if doc['title']
339
+ end
340
+ end
341
+ self
342
+ end
343
+
344
+ # 删除加粗斜体样式 (通过验证, 危险等级: 3, 可能是 Markdown 加粗斜体)
345
+ def del_italics_and_bold
346
+ replace(@string) do
347
+ s /([\W_]|^)(\*\*|__)(?=\S)([^\r]*?\S[\*_]*)\2([\W_]|$)/, '\1\3\4'
348
+ s /([\W_]|^)(\*|_)(?=\S)([^\r\*_]*?\S)\2([\W_]|$)/, '\1\3\4'
349
+ end
350
+ self
351
+ end
352
+
353
+ def foreign_literature
354
+ replace(@string) do
355
+ s /\s*\n/, "\n\n"
356
+ s /\${4,}\s*/, '#### '
357
+ s /[ \u{001A}]/, ''
358
+ s /# [0-9]+.\s*/, '## '
359
+ s /#### 第[^\r\n]+[卷部]\s*(.*)\s*\n/, "PART: "'\1'"\n\n"
360
+ s /#### 第[^\r\n]+[章]\s*(.*)\s*\n/, "# "'\1'"\n\n"
361
+ end
362
+ del_head_blank
363
+ end
364
+
365
+ def ancient_literature
366
+ replace(@string) do
367
+ s /_古诗文网/, ''
368
+ s /作者:.*\r?\n/, ''
369
+ end
370
+ del_head_blank
371
+ end
372
+
373
+ # 判定段落的起始 (通过验证, 危险等级: 0)
374
+ def paragraph
375
+ replace(@string) do
376
+ s /^[[:blank:]]{2,}/, "\n"
377
+ end
378
+ self
379
+ end
380
+
381
+ # 判定章节标题 (通过验证, 危险等级: 0)
382
+ def chapter
383
+ replace(@string) do
384
+ s /^第[一二三四五六七八九十]+[卷部篇]/, 'PART: '
385
+ s /^第[一二三四五六七八九十]+[章]/, '# '
386
+ s /^第[一二三四五六七八九十]+[节]/, '## '
387
+ s /^[一二三四五六七八九十]+、/, '### '
388
+ s /^\([一二三四五六七八九十]+\)/, '#### '
389
+ end
390
+ self
391
+ end
392
+
393
+ def list
394
+ replace(@string) do
395
+ s /^(\d.)\s*/, '\1'"\t"
396
+ s /^[●]\s*/, "-\t"
397
+ end
398
+ self
399
+ end
400
+
401
+ def format_markdown
402
+ markdown2html.html2markdown
403
+ end
404
+
405
+ def markdown2html
406
+ converter = PandocRuby.new(@string, from: :markdown, to: :html)
407
+ @string = converter.convert('chapters', 'indented-code-classes' => 'sourceCode')
408
+ self
409
+ end
410
+
411
+ def html2markdown
412
+ converter = PandocRuby.new(@string, from: :html, to: :markdown)
413
+ @string = converter.convert('chapters', 'atx-headers', 'normalize', 'no-wrap')
414
+ self
415
+ end
416
+
417
+ # 台湾正体到简体
418
+ # brew install opencc
419
+ # sudo gem install ropencc
420
+ def tw2s
421
+ converter = Ropencc.open('tw2s.json')
422
+ @string = converter.convert(@string)
423
+ self
424
+ end
425
+
426
+ private
427
+
428
+ def replace(string, &block)
429
+ string.instance_eval do
430
+ alias :s :gsub!
431
+ instance_eval(&block)
432
+ end
433
+ string
434
+ end
435
+ end
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: replace
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Henry He
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-12-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pandoc-ruby
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.0.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 1.0.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: ropencc
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: 0.0.6
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: 0.0.6
83
+ description: replace file using Regexp.
84
+ email:
85
+ - henryhyn@163.com
86
+ executables:
87
+ - rep
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - bin/rep
92
+ - lib/replace.rb
93
+ homepage: https://github.com/henryhyn/replace
94
+ licenses:
95
+ - MIT
96
+ metadata: {}
97
+ post_install_message:
98
+ rdoc_options: []
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 1.3.6
111
+ requirements: []
112
+ rubyforge_project: replace
113
+ rubygems_version: 2.0.14
114
+ signing_key:
115
+ specification_version: 4
116
+ summary: replace file using Regexp
117
+ test_files: []