replace 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/rep +41 -0
  3. data/lib/replace.rb +435 -0
  4. metadata +117 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 90dc2484ac3ec0eb152c877e9821e63b06cb22d1
4
+ data.tar.gz: a36909c6a332eed9fa64540170559800fb0839b8
5
+ SHA512:
6
+ metadata.gz: 9e54c6df5c3f74a40ef21113827366a9a804fc97bd369ae79e80ac5a46326cef10a5066f095d102283fc16d566f9dbb86c10560dde21cf5f74fa0abf1e3d727d
7
+ data.tar.gz: 158b0fcfd65555c09d6ac582851c0ddd4254e273d825713c02eb69d9a9a53b059246602e714501434a3a70d8ede2d53393d834b6333dd89c5ac68413ed6d8ca3
data/bin/rep ADDED
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ # replace files using regexp
4
+
5
+ require 'optparse'
6
+ require 'replace'
7
+
8
+ # We set default values here.
9
+ options = {
10
+ }
11
+
12
+ ROOT = File.dirname(__FILE__)
13
+ filename = File.join(ROOT, '..', 'lib', 'replace.rb')
14
+ string = File.read(filename)
15
+ replace = Replace.new(string)
16
+ method_comments = replace.help
17
+
18
+ OptionParser.new do |opts|
19
+ opts.banner = 'Usage: rep [options] method [files]'
20
+ methods = Replace.instance_methods - Replace.superclass.instance_methods
21
+ methods.each do |method|
22
+ opts.banner << "\n#{method}"
23
+ opts.banner << "\n#{method_comments[method]}"
24
+ end
25
+ end.parse!
26
+
27
+ files = '*.md'
28
+ files = ARGV[1] if ARGV.length > 1
29
+ method = ARGV[0]
30
+ Dir[files].map do |file|
31
+ string = File.read(file)
32
+ replace = Replace.new(string)
33
+ unless replace.respond_to?(method)
34
+ puts "The object has no the method #{method}, try rep -h to get help."
35
+ exit(0)
36
+ end
37
+ replace.send(method)
38
+ File.open(file).chmod(0644) unless File.writable?(file)
39
+ File.write(file, replace.string)
40
+ puts replace.scan unless replace.scan.nil?
41
+ end
data/lib/replace.rb ADDED
@@ -0,0 +1,435 @@
1
+ require 'yaml'
2
+ require 'pandoc-ruby'
3
+ require 'ropencc'
4
+
5
+ class Replace
6
+ attr_reader :string, :scan
7
+
8
+ def initialize(string)
9
+ @string = string
10
+ end
11
+
12
+ def help
13
+ method_comments = {}
14
+ replace(@string) do
15
+ s /((.*#.*\r?\n)*)\s*def\s+(\w+)/ do
16
+ method_comments[$3.to_sym] = $1
17
+ end
18
+ end
19
+ method_comments
20
+ end
21
+
22
+ def scan_test
23
+ @scan = @string.scan(/\w+/)
24
+ end
25
+
26
+ def scan_url
27
+ @scan = @string.scan(/href=['"](.*?)['"]/)
28
+ end
29
+
30
+ def scan_image
31
+ @scan = @string.scan(/!\[.*?\]\(([^\s]+?)(?:\s+.*?)?\)/)
32
+ end
33
+
34
+ # 扫描注释列表生成替换字典
35
+ def scan_note
36
+ del_head_blank
37
+ note = {}
38
+ # @string.scan(/^[((]\d+[))]\s*(.*?)[::]\s*(.*?)\\?\r?\n/) do |key, value|
39
+ @string.scan(/^(.*?)〔(.*?〕.*?)\r?\n/) do |key, value|
40
+ # key_stem = key.gsub(/[((](.*?)[))]/, '')
41
+ key_stem = "\\^#{key}\\^"
42
+ # note[key_stem] = "#{key}: #{value}"
43
+ note[key_stem] = value.sub(/〕/, ': ')
44
+ end
45
+ note
46
+ end
47
+
48
+ # 批量逐个替换第一个匹配项
49
+ def batch_replace(regexps = {})
50
+ regexps.each do |key, value|
51
+ replace(@string) do
52
+ sub! Regexp.new("\\G(.*?)#{key}", Regexp::MULTILINE), '\1'" ^[#{value}] "
53
+ end
54
+ end
55
+ self
56
+ end
57
+
58
+ def footnote
59
+ batch_replace(scan_note)
60
+ end
61
+
62
+ def simple
63
+ replace(@string) do
64
+ s /cc/, 'dd'
65
+ s /aa/, 'bb'
66
+ end
67
+ self
68
+ end
69
+
70
+ # 处理 Shell 命令 tree 的输出 (通过验证, 危险等级: 0)
71
+ def tree
72
+ replace(@string) do
73
+ s /[│├]/, '|'
74
+ s /[└]/, '\\'
75
+ s /[─]/, '-'
76
+ end
77
+ self
78
+ end
79
+
80
+ def rename
81
+ replace(@string) do
82
+ s /!\[\]\(image(\d+).jpg\)/ do
83
+ i = $1.to_i - 1
84
+ "![](image%03d.jpg)" % i
85
+ end
86
+ end
87
+ self
88
+ end
89
+
90
+ def pre_pandoc_for_latex
91
+ title
92
+ end
93
+
94
+ def post_pandoc_for_latex
95
+ replace(@string) do
96
+ s /\{verbatim\}/, '{Verbatim}'
97
+ s /\\begin\{center\}\\rule\{(.*?)\}\{(.*?)\}\\end\{center\}/, '\newpage'
98
+ s /\s*\\footnote\{(.*?)\}\s*/, '\footnote{\1}'
99
+ s /\\footnote\{(.*?)[::]\s*(.*?)\}/, '〔{\kaishu \1: \2}〕'
100
+ end
101
+ theorem
102
+ end
103
+
104
+ # 标准化 Markdown 文件, 处理 HTML 文件的转换结果 (未通过验证, 危险等级: 4)
105
+ # code.punct2.blank
106
+ def standard
107
+ blank.del_line_break.punct2.code.add_line_break.format_markdown
108
+ end
109
+
110
+ # 处理 pdftotext 的转换结果 (未通过验证, 危险等级: 4)
111
+ # paragraph.blank.del_line_break.chapter.list.punct2.add_line_break
112
+ def pdftotext
113
+ replace(@string) do
114
+ # 删除页码行
115
+ s /^[[:blank:]]*[0-9]+[[:blank:]]*\r?\n/, ''
116
+ end
117
+ paragraph.blank.del_line_break.chapter.list.punct2.add_line_break
118
+ end
119
+
120
+ # 中文标点转为英文标点
121
+ def punct1
122
+ replace(@string) do
123
+ s /,/, ', '
124
+ s /:([^\r\n])/, ":\n"'\1'
125
+ s /;([^\r\n])/, ";\n"'\1'
126
+ s /。([^\r\n])/, ".\n"'\1'
127
+ s /?([^\r\n])/, "?\n"'\1'
128
+ s /!([^\r\n])/, "!\n"'\1'
129
+ s /:\r?\n/, ":\n"
130
+ s /;\r?\n/, ";\n"
131
+ s /。\r?\n/, ".\n"
132
+ s /?\r?\n/, "?\n"
133
+ s /!\r?\n/, "!\n"
134
+ s /(/, ' ('
135
+ s /)/, ') '
136
+ s /\) ([,.])/, ')\1'
137
+ end
138
+ self
139
+ end
140
+
141
+ # 中文标点转为英文标点 (通过验证, 危险等级: 3, 可能需要用中文标点)
142
+ # 保留部分中文符号: 、《》〈〉【】〖〗〔〕
143
+ # ascii2: ?!,;:()
144
+ def punct2
145
+ replace(@string) do
146
+ # ‐‑‒–—―‖‗‘’‚‛“”„‟
147
+ # †‡•‣․‥…‧
148
+ # ‰‱′″‴‵‶‷‸‹›※‼‽‾‿
149
+ # ⁀⁁⁂⁃
150
+ # ⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏
151
+ # ⁐⁑
152
+ # ⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞
153
+ # ⁽⁾
154
+ # 、。〃
155
+ # 〈〉《》「」『』
156
+ # 【】
157
+ # 〔〕〖〗〘〙〚〛〜〝〞〟
158
+ # 〰
159
+ # 〽
160
+ # \p{S}: $+<=>^`|~⁄⁒
161
+ # \p{Sm}: +<=>|~⁄⁒
162
+ # \p{Sc}: $
163
+ # \p{Sk}: ^`
164
+ # \p{Pi}: ‘‛“‟
165
+ # \p{Pf}: ’”
166
+ # 句末符号 .!?;:
167
+ # 标点符号 `$()''""
168
+ # 句中符号 ,、
169
+ s /。/, '.'
170
+ s /[“”]/, '"'
171
+ s /[‘’]/, "'"
172
+ s /──/, '---'
173
+ s /—/, '--'
174
+ end
175
+ ascii2
176
+ end
177
+
178
+ # 台湾标点转大陆标点 (通过验证, 危险等级: 0)
179
+ # ascii2
180
+ def taiwan
181
+ replace(@string) do
182
+ s /「/, '‘'
183
+ s /」/, '’'
184
+ s /『/, '“'
185
+ s /』/, '”'
186
+ end
187
+ ascii2
188
+ end
189
+
190
+ # 双字节 ASCII 字符转为单字节字符 (通过验证, 危险等级: 0)
191
+ # !"#$%&'()*+,-./
192
+ # 0123456789:;<=>?
193
+ # @ABCDEFGHIJKLMNO
194
+ # PQRSTUVWXYZ[\]^_
195
+ # `abcdefghijklmno
196
+ # pqrstuvwxyz{|}~
197
+ # !"#$%&'()*+,-./
198
+ # 0123456789:;<=>?
199
+ # @ABCDEFGHIJKLMNO
200
+ # PQRSTUVWXYZ[\]^_
201
+ # `abcdefghijklmno
202
+ # pqrstuvwxyz{|}~
203
+ def ascii2
204
+ replace(@string) do
205
+ s /([\u{FF01}-\u{FF5E}])/ do
206
+ bytes = $1.bytes
207
+ bytes[1] -= 0xBC
208
+ bytes[2] -= 0x60
209
+ bytes[2] += 64*bytes[1]
210
+ bytes[2..2].pack("c*")
211
+ end
212
+ end
213
+ self
214
+ end
215
+
216
+ # 删除一些没必要的分行
217
+ def del_line_break
218
+ replace(@string) do
219
+ # "无\n法\n处\n理\n这\n种\n情\n况"
220
+ s /(\p{Han})\r?\n(\p{Han})/, '\1\2'
221
+ s /(\p{Han})\r?\n([[:punct:]])/, '\1\2'
222
+ s /…{3,}(\r?\n)+/, ''
223
+ end
224
+ self
225
+ end
226
+
227
+ # 增加一些必要的分行
228
+ def add_line_break
229
+ replace(@string) do
230
+ s /(\p{Han})[[:blank:]]*([:,])[[:blank:]]*(\p{Han})/, '\1\2 \3'
231
+ s /(\p{Han})[[:blank:]]*([。.!?;])[[:blank:]]*(\p{Han})/, '\1\2'"\n"'\3'
232
+ s /(\p{Han})[[:blank:]]*(\p{Ps})/, '\1 \2'
233
+ s /(\p{Pe})[[:blank:]]*(\p{Han})/, '\1 \2'
234
+ end
235
+ self
236
+ end
237
+
238
+ # 删除汉字之间的空格 (通过验证, 危险等级: 3)
239
+ # 添加汉字与数字、英文之间的空格
240
+ # del_head_blank.del_blank_line
241
+ def blank
242
+ replace(@string) do
243
+ # 删除汉字之间的空格, "无 法 处 理 这 种 情 况"
244
+ s /(\p{Han})[[:blank:]]+(\p{Han})/, '\1\2'
245
+ # 添加汉字与数字、英文之间的空格
246
+ s /(\p{Han})(\w)/, '\1 \2'
247
+ s /(\w)(\p{Han})/, '\1 \2'
248
+ end
249
+ del_head_blank.del_blank_line
250
+ end
251
+
252
+ # 删除行首的空白 (通过验证, 危险等级: 3, 可能是 Markdown 缩进)
253
+ # 将看上去像空白的行转化为真真的空白行
254
+ def del_head_blank
255
+ replace(@string) do
256
+ s /^[[:blank:]]+/, ''
257
+ end
258
+ self
259
+ end
260
+
261
+ # 删除行尾的空白 (通过验证, 危险等级: 0)
262
+ # 将看上去像空白的行转化为真真的空白行
263
+ def del_tail_blank
264
+ replace(@string) do
265
+ s /[[:blank:]]+\r?\n/, "\n"
266
+ end
267
+ self
268
+ end
269
+
270
+ # 删除多余的空行 (通过验证, 危险等级: 0)
271
+ # del_tail_blank
272
+ def del_blank_line
273
+ replace(@string) do
274
+ s /(^[[:blank:]]*\r?\n){2,}/, "\n"
275
+ end
276
+ del_tail_blank
277
+ end
278
+
279
+ # 处理插图路径 (通过验证, 危险等级: 0)
280
+ def image
281
+ replace(@string) do
282
+ s /Insert\s(18333fig\d+)\.png\s*\n.*?\d{1,2}-\d{1,2}\. (.*)/, '![\2](\1-tn.png)'
283
+ s /!\[(.*?)\]\(\S*\/(\S*?)( ".*")?\)/, '![\1](\2)'
284
+ end
285
+ self
286
+ end
287
+
288
+ # 删除页眉页脚
289
+ def head_foot
290
+ replace(@string) do
291
+ s /\A(^[^\r\n]*\r?\n){11}\s*/m, ''
292
+ s /^\[«.*?\z/m, ''
293
+ # s /(^.*?\r?\n){4}\z/, ''
294
+ end
295
+ self
296
+ end
297
+
298
+ # 行内代码两边各留一个空格 (未通过验证, 危险等级: 4)
299
+ # jekyll_code
300
+ def code
301
+ replace(@string) do
302
+ # 行内代码两边各留一个空格
303
+ s /([[:alnum:]])`([^`]+?)`([[:alnum:]])/, '\1 `\2` \3'
304
+ end
305
+ jekyll_code
306
+ end
307
+
308
+ # Jekyll 代码格式转为 Fenced 代码格式 (通过验证, 危险等级: 0)
309
+ def jekyll_code
310
+ replace(@string) do
311
+ s /\s*\{%\s*highlight\s+(\w+)\s*%\}\s*/, "\n\n"'```{.\1}'"\n"
312
+ s /\s*\{%\s*endhighlight\s*%\}\s*/, "\n"'```'"\n\n"
313
+ end
314
+ self
315
+ end
316
+
317
+ # 定理环境, LaTeX 命令 (未通过验证, 危险等级: 2)
318
+ def theorem
319
+ replace(@string) do
320
+ s /^(ASSUMPTION|DEFINITION|CONCLUSION|ALGORITHM|EXPERIMENT|EXAMPLE|REMARK|NNOTE|THEOREM|AXIOM|LEMMA|PROPERTY|COROLLARY|PROPOSITION|CLAIM|PROBLEM|QUESTION|CONJECTURE|PROOF|SOLUTION|ANSWER|ANALYSIS)[.:](.*?)(\n(?=\n)|\Z)/mi do
321
+ css_class = $1.downcase
322
+ "\\begin{#{css_class}}\n#{$2.strip}\n\\end{#{css_class}}\n"
323
+ end
324
+ end
325
+ replace(@string) do
326
+ s /^(PART)[.:](.*?)(\n(?=\n)|\Z)/mi do
327
+ "\\#{$1.downcase}{#{$2.strip}}\n"
328
+ end
329
+ end
330
+ self
331
+ end
332
+
333
+ # 转换 YAML 标题信息 (通过验证, 危险等级: 0)
334
+ def title
335
+ replace(@string) do
336
+ s /\A^-{3,}\r?\n(.*?)^-{3,}\r?\n/m do
337
+ doc = YAML::load($1)
338
+ "# #{doc['title']}\n\n" if doc['title']
339
+ end
340
+ end
341
+ self
342
+ end
343
+
344
+ # 删除加粗斜体样式 (通过验证, 危险等级: 3, 可能是 Markdown 加粗斜体)
345
+ def del_italics_and_bold
346
+ replace(@string) do
347
+ s /([\W_]|^)(\*\*|__)(?=\S)([^\r]*?\S[\*_]*)\2([\W_]|$)/, '\1\3\4'
348
+ s /([\W_]|^)(\*|_)(?=\S)([^\r\*_]*?\S)\2([\W_]|$)/, '\1\3\4'
349
+ end
350
+ self
351
+ end
352
+
353
+ def foreign_literature
354
+ replace(@string) do
355
+ s /\s*\n/, "\n\n"
356
+ s /\${4,}\s*/, '#### '
357
+ s /[ \u{001A}]/, ''
358
+ s /# [0-9]+.\s*/, '## '
359
+ s /#### 第[^\r\n]+[卷部]\s*(.*)\s*\n/, "PART: "'\1'"\n\n"
360
+ s /#### 第[^\r\n]+[章]\s*(.*)\s*\n/, "# "'\1'"\n\n"
361
+ end
362
+ del_head_blank
363
+ end
364
+
365
+ def ancient_literature
366
+ replace(@string) do
367
+ s /_古诗文网/, ''
368
+ s /作者:.*\r?\n/, ''
369
+ end
370
+ del_head_blank
371
+ end
372
+
373
+ # 判定段落的起始 (通过验证, 危险等级: 0)
374
+ def paragraph
375
+ replace(@string) do
376
+ s /^[[:blank:]]{2,}/, "\n"
377
+ end
378
+ self
379
+ end
380
+
381
+ # 判定章节标题 (通过验证, 危险等级: 0)
382
+ def chapter
383
+ replace(@string) do
384
+ s /^第[一二三四五六七八九十]+[卷部篇]/, 'PART: '
385
+ s /^第[一二三四五六七八九十]+[章]/, '# '
386
+ s /^第[一二三四五六七八九十]+[节]/, '## '
387
+ s /^[一二三四五六七八九十]+、/, '### '
388
+ s /^\([一二三四五六七八九十]+\)/, '#### '
389
+ end
390
+ self
391
+ end
392
+
393
+ def list
394
+ replace(@string) do
395
+ s /^(\d.)\s*/, '\1'"\t"
396
+ s /^[●]\s*/, "-\t"
397
+ end
398
+ self
399
+ end
400
+
401
+ def format_markdown
402
+ markdown2html.html2markdown
403
+ end
404
+
405
+ def markdown2html
406
+ converter = PandocRuby.new(@string, from: :markdown, to: :html)
407
+ @string = converter.convert('chapters', 'indented-code-classes' => 'sourceCode')
408
+ self
409
+ end
410
+
411
+ def html2markdown
412
+ converter = PandocRuby.new(@string, from: :html, to: :markdown)
413
+ @string = converter.convert('chapters', 'atx-headers', 'normalize', 'no-wrap')
414
+ self
415
+ end
416
+
417
+ # 台湾正体到简体
418
+ # brew install opencc
419
+ # sudo gem install ropencc
420
+ def tw2s
421
+ converter = Ropencc.open('tw2s.json')
422
+ @string = converter.convert(@string)
423
+ self
424
+ end
425
+
426
+ private
427
+
428
+ def replace(string, &block)
429
+ string.instance_eval do
430
+ alias :s :gsub!
431
+ instance_eval(&block)
432
+ end
433
+ string
434
+ end
435
+ end
metadata ADDED
@@ -0,0 +1,117 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: replace
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Henry He
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-12-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.10'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.10'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pandoc-ruby
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 1.0.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 1.0.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: ropencc
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: 0.0.6
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: 0.0.6
83
+ description: replace file using Regexp.
84
+ email:
85
+ - henryhyn@163.com
86
+ executables:
87
+ - rep
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - bin/rep
92
+ - lib/replace.rb
93
+ homepage: https://github.com/henryhyn/replace
94
+ licenses:
95
+ - MIT
96
+ metadata: {}
97
+ post_install_message:
98
+ rdoc_options: []
99
+ require_paths:
100
+ - lib
101
+ required_ruby_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - '>='
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ required_rubygems_version: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 1.3.6
111
+ requirements: []
112
+ rubyforge_project: replace
113
+ rubygems_version: 2.0.14
114
+ signing_key:
115
+ specification_version: 4
116
+ summary: replace file using Regexp
117
+ test_files: []