oa_templater 0.5.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,862 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'support/oa_regexes'
4
+ require 'support/formats'
5
+
6
+ require 'nkf'
7
+ require 'fileutils'
8
+ require 'date'
9
+ require 'sablon'
10
+ require 'yaml'
11
+ require 'csv'
12
+ require 'charlock_holmes'
13
+ require 'kakasi'
14
+
15
+ module OaTemplater
16
+ class OA
17
+ attr_accessor :outputfile
18
+ attr_reader :props
19
+
20
+ def initialize(sourcefile, casenumber = '11110')
21
+ @sourcefile = sourcefile
22
+ @casenumber = casenumber
23
+ read_oa_data
24
+ set_templates
25
+ init_instance_vars
26
+ set_reasons_file
27
+ read_templatable_file #chunks of text for swapping
28
+ end
29
+
30
+ # require template files, not included because of NDA
31
+ def set_templates(options = {})
32
+ defaults = { kyozetsuriyu: File.join(File.dirname(__FILE__), 'default_riyu.docx'),
33
+ shinpankyozetsuriyu: File.join(File.dirname(__FILE__), 'default_shinpankyozetsuriyu.docx'),
34
+ kyozetsusatei: File.join(File.dirname(__FILE__), 'default_satei.docx'),
35
+ shinnen: File.join(File.dirname(__FILE__), 'default_shinnen.docx'),
36
+ shireisho: File.join(File.dirname(__FILE__), 'default_shireisho.docx'),
37
+ rejectamendments: File.join(File.dirname(__FILE__), 'default_rejectamendments.docx'),
38
+ examiners: File.join(File.dirname(__FILE__), 'examiners.txt')
39
+ }
40
+ @templates = defaults.merge(options)
41
+ pick_template
42
+ end
43
+
44
+ # require reason file, not included because of NDA
45
+ def set_reasons_file(r = File.join(File.dirname(__FILE__), 'reasons.yml'))
46
+ @reasons = YAML.load_file(r)
47
+ end
48
+
49
+ # A bunch of set phrases that can be swapped out
50
+ def read_templatable_file(r = File.join(File.dirname(__FILE__), 'support', 'templatable.yml'))
51
+ @templatables = YAML.load_file(r)
52
+ end
53
+
54
+ def parse_appeal_drafted
55
+ capture_the(:appeal_drafted, R_CAPTURE_APPEAL_DRAFTED) # year/month/day
56
+ return if @scrapes[:appeal_drafted].nil?
57
+
58
+ set_prop(:appeal_drafted, format_date('%04u/%02u/%02u', @scrapes[:appeal_drafted]))
59
+ end
60
+
61
+ def parse_drafted
62
+ capture_the(:drafted, R_CAPTURE_DRAFTED) # year/month/day
63
+ return if @scrapes[:drafted].nil?
64
+
65
+ set_prop(:drafted, format_date('%04u/%02u/%02u', @scrapes[:drafted]))
66
+ end
67
+
68
+ def parse_mailing_date(demarker = '-')
69
+ @outputfile = 'oa_template'
70
+ capture_the(:mailing_date, R_CAPTURE_MAILING_DATE)
71
+ return if @scrapes[:mailing_date].nil?
72
+
73
+ @outputfile = "ALP#{@casenumber}#{demarker}#{@template_name}#{demarker}#{format_date('%04u%02u%02u', @scrapes[:mailing_date])}.docx"
74
+
75
+ set_prop(:mailing_date, format_date('%04u/%02u/%02u', @scrapes[:mailing_date]))
76
+ end
77
+
78
+ def parse_satei_previous_oa
79
+ capture_the(:satei_previous_oa, R_CAPTURE_PREVIOUS_OA)
80
+ return if @scrapes[:satei_previous_oa].nil?
81
+
82
+ set_prop(:satei_previous_oa, format_date('%04u/%02u/%02u', @scrapes[:satei_previous_oa]))
83
+
84
+ # set "and Amendments"
85
+ set_prop(:and_amendments, R_AND_AMENDMENTS =~ @data ? 'Remarks and Amendments' : 'Remarks')
86
+
87
+ # set the reason
88
+ # default
89
+ set_prop(:satei_reasons, 'the reasons')
90
+ r = @data.match(R_SATEI_REASONS)
91
+ m = r.nil? ? '理由' : r[1].gsub!(/\s+/, '')
92
+ set_prop(:satei_reasons, m == '理由' ? 'the reasons' : format_headers(m))
93
+ end
94
+
95
+ def parse_amendments_date
96
+ capture_the(:amendments_date, R_CAPTURE_AMENDMENTS_DATE)
97
+ return if @scrapes[:amendments_date].nil?
98
+
99
+ set_prop(:amendments_date, format_date('%04u/%02u/%02u', @scrapes[:amendments_date]))
100
+ end
101
+
102
+ def parse_retroactive
103
+ capture_the(:retroactive, R_CAPTURE_RETROACTIVE)
104
+ return if @scrapes[:retroactive].nil?
105
+
106
+ set_prop(:retroactive, format_date("\nFiling Date (Retroactive Date) \t%04u/%02u/%02u\n \n", @scrapes[:retroactive]))
107
+ end
108
+
109
+ def parse_appeal_no
110
+ capture_the(:appeal_no, R_CAPTURE_APPEAL_NO)
111
+ return if @scrapes[:appeal_no].nil?
112
+
113
+ set_prop(:appeal_no, NKF.nkf('-m0Z1 -w', @scrapes[:appeal_no][1]) + '-' + NKF.nkf('-m0Z1 -w', @scrapes[:appeal_no][2]))
114
+ end
115
+
116
+ def parse_shireisho_app
117
+ capture_the(:shireisho_num, R_CAPTURE_SHIREISHO_APP)
118
+ return if @scrapes[:shireisho_num].nil?
119
+
120
+ set_prop(:shireisho_num, NKF.nkf('-m0Z1 -w', @scrapes[:shireisho_num][1]) + '-' + NKF.nkf('-m0Z1 -w', @scrapes[:shireisho_num][2]))
121
+ end
122
+
123
+ def parse_shireisho_code
124
+ capture_the(:scode, R_CAPTURE_SHIREISHO_CODE)
125
+ return if @scrapes[:scode].nil?
126
+
127
+ set_prop(:scode, NKF.nkf('-m0Z1 -w', @scrapes[:scode][1]) + ' ' + NKF.nkf('-m0Z1 -w', @scrapes[:scode][2]))
128
+ end
129
+
130
+ def parse_app_no
131
+ capture_the(:app_no, R_CAPTURE_APP_NO)
132
+ if @scrapes[:app_no].nil?
133
+ #try for the appeal format if nothing came up
134
+ capture_the(:app_no, R_CAPTURE_APPEAL_APP_NO)
135
+ end
136
+ return if @scrapes[:app_no].nil?
137
+
138
+ set_prop(:app_no, NKF.nkf('-m0Z1 -w', @scrapes[:app_no][1]) + '-' + NKF.nkf('-m0Z1 -w', @scrapes[:app_no][2]))
139
+ end
140
+
141
+ # definitely need to fix this up later, haha
142
+ def parse_examiner(do_examiner = false)
143
+ capture_the(:taro, R_CAPTURE_TARO) # 1, 2 (codes are #3, 4)
144
+
145
+ #if there was no normal appeal examiner, try an appeal examiner
146
+ if @scrapes[:taro].nil?
147
+ capture_the(:taro, R_CAPTURE_APPEAL_TARO)
148
+ end
149
+
150
+ return if @scrapes[:taro].nil?
151
+
152
+ found = false
153
+
154
+ if do_examiner
155
+ last, first = @scrapes[:taro][1], @scrapes[:taro][2]
156
+
157
+ CSV.foreach(@templates[:examiners]) do |r|
158
+ if NKF.nkf('-m0Z1 -w', r[1]).eql? (' ' + last + ' ' + first)
159
+ set_prop(:taro, r[0])
160
+ found = true
161
+ break
162
+ end
163
+ end
164
+
165
+ unless found
166
+ found = true
167
+ first, last = Kakasi.kakasi('-Ja', first).capitalize, Kakasi.kakasi('-Ja', last).upcase
168
+
169
+ # use kakashi to romajify the Examiner names
170
+ set_prop(:taro, "#{first} #{last} #{@scrapes[:taro][1]} #{@scrapes[:taro][2]}")
171
+ end
172
+ end
173
+
174
+ set_prop(:taro, @scrapes[:taro][1] + ' ' + @scrapes[:taro][2]) unless found
175
+
176
+ # always set examiners numbers
177
+ set_prop(:code, NKF.nkf('-m0Z1 -w', @scrapes[:taro][3]) + ' ' + NKF.nkf('-m0Z1 -w', @scrapes[:taro][4]))
178
+ end
179
+
180
+ def parse_appeal_examiner
181
+ capture_the(:appeal_taro, R_CAPTURE_APPEAL_TARO) # 1, 2
182
+ return if @scrapes[:appeal_taro].nil?
183
+
184
+ set_prop(:appeal_taro, @scrapes[:appeal_taro][1] + ' ' + @scrapes[:appeal_taro][2])
185
+ end
186
+
187
+ def parse_final_oa
188
+ set_prop(:reason_for_final, '')
189
+ capture_the(:final_oa, /<<<<\p{Z}+最\p{Z}{0,6}後\p{Z}+>>>>/)
190
+ return if @scrapes[:final_oa].nil?
191
+ set_prop(:final_oa, "\n<<<< FINAL >>>>\n")
192
+ set_prop(:reason_for_final, Sablon.content(:word_ml, FINALWML))
193
+ end
194
+
195
+ def parse_see_list
196
+ set_prop(:see_list, /引用文献等については引用文献等一覧参照/ =~ @data ? " \n(See the List of Citations for the cited publications) \n" : '')
197
+ end
198
+
199
+ def parse_response_period
200
+ set_prop(:response_period, R_RESPONSE_PERIOD =~ @data ? '60 days' : 'three months')
201
+ end
202
+
203
+ def parse_our_lawyer
204
+ capture_the(:our_lawyer, /[特許出願人]*代理人[弁理士]*[弁護士]*\p{Zs}+(\S+?)\p{Zs}(\S+?)/)
205
+ return if @scrapes[:our_lawyer].nil?
206
+
207
+ # only check last name
208
+ case @scrapes[:our_lawyer][1]
209
+ when '村山'
210
+ set_prop(:our_lawyer, 'Yasuhiko MURAYAMA')
211
+ when '志賀'
212
+ set_prop(:our_lawyer, 'Masatake SHIGA')
213
+ when '佐伯'
214
+ set_prop(:our_lawyer, 'Yoshifumi SAEKI')
215
+ when '渡邊'
216
+ set_prop(:our_lawyer, 'Takashi WATANABE')
217
+ when '実広'
218
+ set_prop(:our_lawyer, 'Shinya JITSUHIRO')
219
+ when '棚井'
220
+ set_prop(:our_lawyer, 'Sumio TANAI')
221
+ else
222
+ set_prop(:our_lawyer, 'Taro TOKKYO')
223
+ end
224
+ end
225
+
226
+ def parse_note_to_applicant
227
+ capture_the(:note_to_applicant, /本願出願時に公開されており、/)
228
+ return if @scrapes[:note_to_applicant].nil?
229
+
230
+ set_prop(:note_to_applicant, "\t• Request to the Applicant\r\n\tCitation 1 was already published at the time of filing of the present application and has a common applicant or inventor with the present application. Citation 1 alone would be a bar to the novelty or inventive step of more than one claim of the present application.\r\n\tBased on this type of citation, appropriately evaluating the invention in advance can be thought to be beneficial to the applicant while creating appropriate claims, as well as helpful to the Examiner for an efficient and accurate examination. We request that the applicant disclose this type of citation that the applicant is already aware of when filing the application or a request for examination, as well as requesting that the applicant evaluates whether or not the invention for which a patent is sought has patentability based on this type of citation. ")
231
+ end
232
+
233
+ def parse_currently_known
234
+ case @data
235
+ when /拒絶の理由を発見しない請求項/
236
+ if m = @data.match(R_CAPTURE_NO_REJECT_CLAIMS)
237
+ set_prop(:currently_known, "<Claims for which no reasons for rejection have been found>\r\n \tNo reasons for rejection are currently known for #{format_headers(m[1])} which were not indicated in this Notice of Reasons for Rejection. The applicant will be notified of new reasons for rejection if such reasons for rejection are found.")
238
+ else
239
+ set_prop(:currently_known, "<Claims for which no reasons for rejection have been found>\r\n \tNo reasons for rejection are currently known for the claims which were not indicated in this Notice of Reasons for Rejection. The applicant will be notified of new reasons for rejection if such reasons for rejection are found.")
240
+ end
241
+ when /拒絶の理由が通知される/
242
+ set_prop(:currently_known, 'The applicant will be notified of new reasons for rejection if such reasons for rejection are found.')
243
+ else
244
+ set_prop(:currently_known, '')
245
+ end
246
+ end
247
+
248
+ def parse_ipc
249
+ ipc_text = ''
250
+
251
+ if m = @data.match(/先行技術文献(?:等{0,1})調査結果(.*?)..先行技術文献/m)
252
+ data = m[1]
253
+ ipc_list_end = m.end(0)
254
+ if m = data.match(/(I|I)(P|P)(C|C)/)
255
+ data = data[m.begin(0)..-2]
256
+ ipc_text = NKF.nkf('-m0Z1 -w', data).gsub('IPC', 'IPC:').gsub('DB名', "\tDB Name:").gsub('^\p{Z}{3,8}', "\t ")
257
+ parse_ipc_references(ipc_list_end)
258
+ end
259
+ end
260
+
261
+ set_prop(:ipc_list, ipc_text)
262
+ end
263
+
264
+ def parse_ipc_references(ipc_list_end)
265
+ ipc_reference_text = ''
266
+ data = @data[ipc_list_end..-1]
267
+
268
+ if m = data.match(/(^.*先行技術文献調査結果|この拒絶理由通知の内容)/)
269
+ @cits ||= YAML.load_file(CITATIONS_FILE)
270
+ data = data[0..m.begin(0)]
271
+ oldmatch = false
272
+ count = 1
273
+
274
+ data.each_line do |line|
275
+ match = false
276
+ @cits.each do |_n, a|
277
+ if m = line.match(a['japanese'])
278
+ match = true
279
+ ipc_reference_text += "#{count}. #{convert_pub_no(m, a['english'])}\n"
280
+ end
281
+ end # cits.each
282
+
283
+ unless match
284
+ # if no match, change 全角 to 半角
285
+ line = NKF.nkf('-m0Z1 -w', line)
286
+
287
+ # first line of non-match
288
+ if oldmatch and (!match)
289
+ line.gsub!(/^/, "#{count}. ") if line.length > 4
290
+ end
291
+
292
+ # >1st line of non-match
293
+ if (!oldmatch) and (!match)
294
+ count -= 1 #decrease count to that it stays the same after being increased below
295
+ #remove newlines since it's probably a big english title
296
+ ipc_reference_text.gsub!(/\r\n$/,"\n") if line.length > 4
297
+ end
298
+
299
+ line = pad_spaces(line)
300
+
301
+ ipc_reference_text += line
302
+ end
303
+
304
+ # increase count
305
+ count += 1
306
+
307
+ oldmatch = match
308
+ end
309
+ end
310
+
311
+ set_prop(:ipc_reference_text, ipc_reference_text)
312
+ end
313
+
314
+ def parse_headers(dh)
315
+ oa_headers_text = ''
316
+
317
+ if dh and m = @data.match(/(?:理\p{Z}{0,2}由.*^\p{Z}*先行技術文献調査結果の記録|理\p{Z}{0,2}由.*^-----------|\p{Z}記\p{Z}.*引\p{Z}?用\p{Z}?文\p{Z}?献\p{Z}?等\p{Z}?一\p{Z}?覧|\p{Z}記\p{Z}.*^-----------|理\p{Z}{0,2}由.*引\p{Z}?用\p{Z}?文\p{Z}?献\p{Z}?等\p{Z}?一\p{Z}?覧|理\p{Z}{0,2}由.*最後の拒絶理由通知とする理由|検討しましたが.*|\p{Z}理\p{Z}{0,2}由.*この通知に関するお問い合わせがございましたら)/mi)
318
+ # gsub to strip HTML tags from japanese text
319
+ tdata = @data[m.begin(0)..m.end(0)].gsub(%r{</?[^>]+?>}, '').gsub("\r\n", "\n")
320
+ #
321
+ # matches stuff like this
322
+ # (理由1)
323
+ # <請求項1-11>
324
+ # ・引用文献1
325
+ # 引用文献1
326
+ # 引用文献:1
327
+ # 備考
328
+
329
+ line = ''
330
+ until tdata.nil? do
331
+ m = tdata.match(/^(.*?)\n/)
332
+ break if m.nil?
333
+
334
+ line = m[1] =~ /^\s*$/ ? "\n" : m[1] #save a newline if it's empty
335
+
336
+ tdata = tdata[m.end(0)..-1]
337
+
338
+ if res = check_for_templatable_portion(line, tdata)
339
+ tex, tdata = res
340
+ # added a match against unnecessary IPC lines
341
+ oa_headers_text += tex
342
+ elsif m = line.match(R_HEADER_TYPES)
343
+ tex = m[1]
344
+
345
+ oa_headers_text += tex =~ /^\s*$/ ? "\n" : format_headers(tex) + "\n" unless mistaken_header?(tex)
346
+ end
347
+ end
348
+
349
+ 3.times { oa_headers_text.gsub!("\n\n\n", "\n") }
350
+
351
+ oa_headers_text.encode!(:xml => :text)
352
+ #replace OAOA_TEMPLATER_TAB with word_ml newline + tab
353
+ oa_headers_text.gsub!(/OAOA_TEMPLATER_TAB/, STOPSTARTTAB)
354
+ #replace newlines with word_ml newlines
355
+ oa_headers_text.gsub!(/\n/, STOPSTARTP)
356
+
357
+
358
+ set_prop(:oa_headers, Sablon.content(:word_ml, sprintf(HEADERS_FMT, oa_headers_text)))
359
+ end
360
+ end
361
+
362
+ def check_for_templatable_portion(line, tdata)
363
+ odata = [line, tdata].join.gsub("\n", '') #unshift the first line back to tdata
364
+ @templatables.each do |t, a|
365
+ if line =~ a['detect']
366
+ if m = odata.match(a['full'])
367
+ #this starting offset should actually be m.end(0) - line.length + (the number of newline characters up to the match)
368
+ tdata = [line, "\n", tdata].join
369
+ tdata = tdata[m.end(0) .. -1]
370
+ tex = odata[m.begin(0)..m.end(0)]
371
+ tex.gsub!(a['full']) do
372
+ res = a['text']
373
+ Regexp.last_match.captures.each_with_index do |match, i|
374
+ if match =~ /^請求項[\p{N},~、-及びおよ]+$/
375
+ res = res.gsub(/\\#{i+1}/, format_headers(match))
376
+ elsif match =~ /^請求項[\p{N},~、-及びおよ]+に係る発明$/
377
+ res = res.gsub(/\\#{i+1}/, format_invention_according_to(match))
378
+ else
379
+ res = res.gsub(/\\#{i+1}/, NKF.nkf('-m0Z1 -w', match))
380
+ end
381
+ end
382
+ res
383
+ end
384
+ return [tex, tdata]
385
+ end
386
+ end
387
+ end
388
+
389
+ return nil
390
+ end
391
+
392
+ def format_invention_according_to(str)
393
+ res = format_headers(str.gsub('に係る発明','').gsub('請求項', 'the invention according to 請求項'))
394
+ res += res =~ /inventions/ ? ' are' : ' is'
395
+ end
396
+
397
+ def mistaken_header?(tex)
398
+ val = false
399
+ val = true if tex =~ /調査/ || /先行技術文/ =~ tex || /注意/ =~ tex and !(/検討しましたが/ =~ tex)
400
+ val = true if (tex =~ /段/) || (tex =~/段/) || (tex =~ /には/) || (tex =~ /が/)
401
+ val
402
+ end
403
+
404
+ def parse_citations
405
+ citation_text = ''
406
+
407
+ if m = @data.match(R_CITATIONS_START)
408
+ @cits ||= YAML.load_file(CITATIONS_FILE)
409
+ count = 0
410
+ data = @data[m.end(0) - 2..-1].gsub(%r{</?[^>]+?>}, '') # end minus '1.', gsub to remove html
411
+
412
+ catch :done_scanning do
413
+ data.each_line do |line|
414
+ tex = line
415
+ throw :done_scanning if (/^\s*$/ =~ line) || (line[0..2].eql?('---'))
416
+
417
+ old_citation_text = citation_text
418
+ if /^\p{Z}*\p{N}+((?:\.|.|:)+.*?)/m =~ tex
419
+ count += 1
420
+ end
421
+
422
+ @cits.each do |_n, a|
423
+ if m = tex.match(a['japanese'])
424
+ if /United States/ =~ a['english']
425
+ # citation is in English (no prime needed)
426
+ citation_text += sprintf(CIT_SIMPLE, count, convert_pub_no(m, a['english']))
427
+ else # normal
428
+ if /Published Japanese Translation No./ =~ a['english']
429
+ citation_text += sprintf(CIT_WITH_PRIME_PCT, count, convert_pub_no(m, a['english']), count)
430
+ else
431
+ citation_text += sprintf(CIT_WITH_PRIME, count, convert_pub_no(m, a['english']), count)
432
+ end
433
+ end
434
+ end
435
+ end # cits
436
+
437
+ if old_citation_text == citation_text
438
+ tex = NKF.nkf('-m0Z1 -w', tex)
439
+ # strip blank dos lines
440
+ tex.gsub!(/\p{Z}*\r\n/, '')
441
+ tex = pad_spaces(tex)
442
+
443
+ # if no match was found, just copy the japanese, skip first character (it's a period from the regex)
444
+ # should have the correct number from the actual source (not from count variable)
445
+ tex.encode!(:xml => :text) if tex
446
+ citation_text += sprintf(CIT_MISS, tex)
447
+ end
448
+ end # each line
449
+ end # catch
450
+ end # if citations found
451
+
452
+ set_prop(:citation_list, Sablon.content(:word_ml, citation_text))
453
+ end
454
+
455
+ def pad_spaces (tex)
456
+ # add space after period, add space after comma, remove year kanji, reduce multiple spaces to 1
457
+ tex.gsub!(/\.(\p{N})/, '. \1')
458
+ tex.gsub!(/\,/, ', ')
459
+ tex.gsub!(/年/, '')
460
+ tex.gsub!(/p{Z}*/, ' ')
461
+ tex.gsub!(/. ,/, '.,')
462
+ tex
463
+ end
464
+
465
+ def convert_pub_no(m, eng)
466
+ #m is MatchData object, handle different styles of citations
467
+ #by using the number of captures
468
+ case m.length
469
+ when 2
470
+ pub = (eng =~ /United States Patent No/) ? eng.gsub('CIT_NO', NKF.nkf('-m0Z1 -w', m[1]).to_i.commas) : (eng =~ /European Patent/ ? eng.gsub('CIT_NO', NKF.nkf('-m0Z1 -w', m[1]).to_i.eurostyle) : eng.gsub('CIT_NO', NKF.nkf('-m0Z1 -w', m[1])))
471
+ when 3
472
+ pub = eng.gsub('CIT_NO', (NKF.nkf('-m0Z1 -w', m[1]) + '/' + NKF.nkf('-m0Z1 -w', m[2])))
473
+ when 4, 5
474
+ pub = eng.gsub('CIT_NO', convert_possible_heisei(m[2], m[3], m[4]))
475
+ when 9
476
+ pub = eng.gsub(/CIT_NO /, convert_possible_heisei(m[2], m[3], m[4]) + ' ').gsub('CIT_NO2', convert_possible_heisei(m[6], m[7], m[8]))
477
+ end
478
+
479
+ pub
480
+ end
481
+
482
+ # matches /([昭|平]*)(\p{N}+?).(?:\p{Z}*)(\p{N}+?)号/
483
+ # convert 平09-060274 into H09-060274 or 2008-003749 into 2008-003748
484
+ def convert_possible_heisei(hs, first, last)
485
+ no = ''
486
+ case hs
487
+ when'平'
488
+ no += 'H' + sprintf('%02u', NKF.nkf('-m0Z1 -w', first).to_i(10)) + '-' + NKF.nkf('-m0Z1 -w', last)
489
+ when '昭'
490
+ no += 'S' + sprintf('%02u', NKF.nkf('-m0Z1 -w', first).to_i(10)) + '-' + NKF.nkf('-m0Z1 -w', last)
491
+ else
492
+ no += NKF.nkf('-m0Z1 -w', first) + '-' + NKF.nkf('-m0Z1 -w', last)
493
+ end
494
+
495
+ no
496
+ end
497
+
498
+ def parse_articles
499
+ count = 1
500
+ articles_text = '<w:p><w:pPr><w:kinsoku w:val="0"/><w:spacing w:line="360" w:lineRule="atLeast"/></w:pPr><w:r w:rsidR="006A661C"><w:rPr><w:b/><w:noProof/></w:rPr><w:t>Cited Articles:</w:t><w:tab/><w:tab/><w:tab/>'
501
+ reasons_for_text = '<w:p><w:pPr><w:kinsoku w:val="0"/><w:spacing w:line="360" w:lineRule="atLeast"/></w:pPr><w:r w:rsidR="006A661C"><w:rPr><w:noProof/></w:rPr>'
502
+
503
+ found_articles = []
504
+ original_length = articles_text.length
505
+
506
+ m = @data.match(R_ARTICLE_SECTION)
507
+ target_data = m ? m[0] : @data
508
+
509
+ line_num = 0
510
+
511
+ while (line = target_data.lines[line_num..line_num+1]) do
512
+ #last line+1 will return nil
513
+ if line
514
+ line = line.join.gsub(/\r\n|\r|\n/,'') #merge 2 lines and remove newline
515
+ else
516
+ break
517
+ end
518
+
519
+ line_num += 1
520
+
521
+ @reasons.each do |_r, a|
522
+ if line =~ a['japanese']
523
+ found_articles << a['short']
524
+
525
+ unless /#{Regexp.quote(a["english"])}/m =~ reasons_for_text
526
+ reasons_for_text += "<w:t>#{count}.</w:t><w:tab/><w:t>#{a['english']}</w:t><w:br/><w:br/>"
527
+ count += 1
528
+ end
529
+ end
530
+ end
531
+ end
532
+
533
+ found_articles.uniq.sort.each do |a|
534
+ # skip tab on first reason
535
+ articles_text += "<w:tab/><w:tab/><w:tab/><w:tab/><w:tab/>" unless articles_text.length == original_length
536
+ # only add short text once (36 shows up multiple times)
537
+ articles_text += "<w:t>#{a}</w:t><w:br/>"
538
+ end
539
+
540
+ # remove number if only 1 article listed
541
+ reasons_for_text.gsub!(/<w:t>1.<\/w:t><w:tab\/><w:t>/, '<w:tab\/><w:t>') if count == 2
542
+
543
+ #remove final word_ml newline
544
+ reasons_for_text.gsub!(/<w:br\/><w:br\/>$/, '')
545
+ articles_text.gsub!(/<w:br\/>$/, '')
546
+
547
+ #close the paragraph
548
+ articles_text += '</w:r></w:p>'
549
+ reasons_for_text += '</w:r></w:p>'
550
+
551
+ set_prop(:articles, Sablon.content(:word_ml, articles_text))
552
+ set_prop(:reasons_for, Sablon.content(:word_ml, reasons_for_text))
553
+ end
554
+
555
+ def finish(options = {})
556
+ defaults = { sablon: true
557
+ }
558
+ options = defaults.merge(options)
559
+
560
+ stemplate = Sablon.template(@template)
561
+ stemplate.render_to_file @outputfile, @props
562
+ end
563
+
564
+ def scan(options = {})
565
+ defaults = { do_headers: false,
566
+ do_dashes: 0,
567
+ do_examiner: false,
568
+ sablon: true
569
+ }
570
+ options = defaults.merge(options)
571
+
572
+ parse_mailing_date options[:do_dashes]
573
+ parse_examiner options[:do_examiner]
574
+ parse_app_no
575
+ parse_drafted
576
+ parse_our_lawyer
577
+ parse_response_period
578
+ parse_see_list
579
+ parse_final_oa
580
+ parse_amendments_date
581
+ parse_satei_previous_oa
582
+ parse_articles
583
+ parse_currently_known
584
+ parse_citations
585
+ parse_ipc
586
+ parse_appeal_examiner
587
+ parse_appeal_drafted
588
+ parse_appeal_no
589
+ parse_retroactive
590
+ parse_note_to_applicant
591
+ parse_shireisho_app
592
+ parse_shireisho_code
593
+
594
+ parse_headers options[:do_headers]
595
+ end
596
+
597
+ private
598
+
599
+ def squish!(t)
600
+ t.gsub!(/\A[[:space:]]+/, '')
601
+ t.gsub!(/[[:space:]]+\z/, '')
602
+ t.gsub!(/[[:space:]]+/, ' ')
603
+ end
604
+
605
+ def format_headers(tex, options = {})
606
+ defaults = { replace_toh: false,
607
+ ignore_toh: true,
608
+ markdown: false
609
+ }
610
+ options = defaults.merge(options)
611
+
612
+ squish! tex
613
+
614
+ # try to handle when Examiners put multiple groups separated by : or /
615
+ # on the same line like 引用文献1:請求項1,2/ bla
616
+ formatted_text = ''
617
+ if R_HEADER_SEPARATOR_DETECT =~ tex
618
+ # super fragile. If regex is changed
619
+ # demarker = NKF.nkf('-m0Z1 -w', '#{$&[1,1]} ')
620
+ demarker = NKF.nkf('-m0Z1 -w', "#{$1} ") #$~ is last matchdata
621
+ tex.split(R_HEADER_SEPARATOR).each do |section|
622
+ formatted_text += demarker unless formatted_text.length == 0
623
+
624
+ if section =~ R_JPL_DETECT
625
+ formatted_text += handle_jpl(section)
626
+ else
627
+ #no jpl to handle
628
+ formatted_text += format_headers(section, options)
629
+ end
630
+
631
+ end
632
+ else
633
+ if /#{R_HEADER_REASONS}/x =~ tex
634
+ #handle special Reason lines
635
+ if /及び|、/ =~ tex
636
+ tex.split(/及び|、/).each do |section|
637
+ section = format_number_listing(section)
638
+
639
+ formatted_text += ' and ' unless formatted_text.length == 0
640
+ formatted_text += "#{replace_common_phrases(section, options)}".gsub('(', ' (') #add space before parenthasis
641
+ end
642
+ else
643
+ if tex =~ R_JPL_DETECT
644
+ formatted_text += handle_jpl(tex)
645
+ else
646
+ #no jpl to handle
647
+ formatted_text = "#{replace_common_phrases(tex, options)}"
648
+ end
649
+ end
650
+ formatted_text.gsub!('Reason(', 'Reason (')
651
+ formatted_text.gsub!(/(\p{N})\(/, '\1 (')
652
+ else
653
+ if tex =~ R_JPL_DETECT
654
+ #note : it cant get here
655
+ formatted_text += handle_jpl(tex)
656
+ else
657
+ #no jpl to handle
658
+ formatted_text = "#{replace_common_phrases(tex, options)}"
659
+ end
660
+ end
661
+ end
662
+
663
+ formatted_text
664
+ end
665
+
666
+ def replace_common_phrases(tex, options = {})
667
+ defaults = { replace_toh: false,
668
+ ignore_toh: true
669
+ }
670
+ options = defaults.merge(options)
671
+
672
+ tex = NKF.nkf('-m0Z1 -w', tex)
673
+ tex = swap_words(tex)
674
+ tex.gsub!('等', '') if options[:ignore_toh]
675
+ tex.gsub!('等', ', etc.') if options[:replace_toh]
676
+
677
+ # strip abberant \r characters
678
+ tex.gsub!("\r", '')
679
+
680
+ tex = format_number_listing(tex)
681
+ end
682
+
683
+ def handle_jpl(tex)
684
+ #comes in looking something like "(A)理由1(特許法29条1項3号)"
685
+ jpl = ''
686
+ tex = NKF.nkf('-m0Z1 -w', tex)
687
+ jpl = tex.gsub(/(.*)特許法第?(\p{N}+)条第?(\p{N}+)項(?:第?(\p{N}+.*)号)*/){
688
+ replace_common_phrases(format_number_listing($1)) + "Japanese Patent Law, Article #{$2}, Paragraph #{$3}, Number #{$4}"
689
+ }
690
+ jpl.gsub!(/, Number\p{Z}+\)(?:について)?$/, ')') #if it doesnt have a \4
691
+ jpl.gsub!(')R', ') R')
692
+ jpl.gsub!('Reason(', 'Reason (')
693
+ jpl.gsub!(/(\p{N})\(/, '\1 (')
694
+ jpl
695
+ end
696
+
697
+ #do actual swapping of japanese and english words
698
+ def swap_words(tex)
699
+ tex.gsub!('、', ',')
700
+ tex.gsub!(',', ',')
701
+ tex.gsub!('拡大先願', 'Expansion of Application')
702
+ tex.gsub!('ないし', 'to')
703
+ tex.gsub!('について', '')
704
+ tex.gsub!('のいずれか', 'any one of')
705
+ tex.gsub!('及び', ',')
706
+ tex.gsub!('および', ',')
707
+ tex.gsub!('進歩性', 'Inventive Step')
708
+ tex.gsub!('実施可能要件', 'Enablement Requirements')
709
+ tex.gsub!('産業上の利用可能性', 'Industrial Applicability')
710
+ tex.gsub!('発明の単一性', 'Unity of Invention')
711
+ tex.gsub!('明確性', 'Clarity')
712
+ tex.gsub!('サポート要件', 'Support Requirements')
713
+ tex.gsub!('新規性', 'Novelty')
714
+ tex.gsub!(/請\p{Z}*求\p{Z}*項/, 'Claim')
715
+ tex.gsub!('引用文献', 'Citation')
716
+ tex.gsub!(/引\p{Z}*用\p{Z}*例/, 'Citation')
717
+ tex.gsub!(/実\p{Z}*施\p{Z}*例/, 'Embodiment')
718
+ tex.gsub!(/理\p{Z}*由/, 'Reason')
719
+ tex.gsub!(/先\p{Z}*願/, 'Prior Application')
720
+ tex.gsub!('-', 'to')
721
+ tex.gsub!('-', 'to')
722
+ tex.gsub!('~', 'to')
723
+ tex.gsub!('乃至', 'to')
724
+ tex.gsub!('理由', 'Reason')
725
+
726
+ # match 備考:
727
+ tex.gsub!('備考', 'Notes')
728
+
729
+ tex
730
+ end
731
+
732
+ # formats a number listing assuming only one list in the string
733
+ # one level up, format_headers breaks single lines into a plurality of these
734
+ # ex: 請求項3,17,31,45
735
+ def format_number_listing(tex)
736
+ tex = NKF.nkf('-m0Z1 -w', tex)
737
+
738
+ # if no numbers (like 'Notes:') then do nothing
739
+ if m = tex.match(/(?:...)(.*?)\p{N}/) # skip first two charcters in case it's something like '1.理由1,2について'
740
+ # opening, numbers, close
741
+ op = tex[0..m.end(1) - 1]
742
+ num_start = m.end(1)
743
+ m = tex.match(/\p{N}(?!.*\p{N})/)
744
+ cl = tex[m.end(0)..-1]
745
+ nums = tex[num_start..m.end(0) - 1]
746
+
747
+ parsed = nums.split(/((?:~|-)*\p{N}+(?:to\p{N}+)*,*)/).reject(&:empty?)
748
+
749
+ # change ['1to2,', '3'] to ['1', '2', '3']
750
+ parsed.each_index do |el|
751
+ if /to\p{N}/ =~ parsed[el]
752
+ parts = parsed[el].split(/to/)
753
+ if parts[0].to_i(10) == (parts[1].to_i(10) - 1)
754
+ parsed[el] = parts[0] + ','
755
+ parsed.insert(el + 1, parts[1])
756
+ end
757
+ end
758
+ end
759
+
760
+ if parsed.length > 1
761
+ parsed.insert(-2, 'and')
762
+ parsed[0].gsub!(',', '') if parsed.length == 3
763
+ end
764
+
765
+ tex = "#{op} #{parsed.join(' ')}#{cl}"
766
+
767
+ if (parsed.length > 2) || (/\p{N}to\p{N}/ =~ tex)
768
+ tex.gsub!('Claim', 'Claims')
769
+ tex.gsub!('Citation', 'Citations')
770
+ tex.gsub!('Embodiment', 'Embodiments')
771
+ tex.gsub!('Reason', 'Reasons')
772
+ tex.gsub!('invention', 'inventions')
773
+ tex.gsub!('Prior Application', 'Prior Applications')
774
+ end
775
+ tex.gsub!('to', ' to ')
776
+
777
+ # remove extra spaces
778
+ tex.gsub!(/\p{Z}+/, ' ')
779
+ end
780
+
781
+ # dont feel like trackign this bug down, cludge
782
+ tex.gsub!('( ', ' (')
783
+
784
+ tex
785
+ end
786
+
787
+ # the @props hash is passed to docx_templater gem
788
+ def set_prop(prop, value)
789
+ @props[prop] = value
790
+ end
791
+
792
+ def init_instance_vars
793
+ @props = {}
794
+ @scrapes = {}
795
+ @props[:citaton_list] = ''
796
+ capture_the(:mailing_no, /発送番号\p{Z}+(\S+)/)
797
+ capture_the(:ref_no, /整理番号\p{Z}+(\S+)/)
798
+ capture_the(:ipc_list, /調査した分野$/)
799
+ set_prop(:ipc_reference_text, '')
800
+ end
801
+
802
+ def read_oa_data
803
+ # read in OA data
804
+ begin
805
+ @data = File.read(@sourcefile)
806
+ rescue
807
+ raise 'oa_templater_exception'
808
+ end
809
+
810
+ begin
811
+ # convert detected encoding (usually SHIFT_JIS Japanese) to UTF-8
812
+ detection = CharlockHolmes::EncodingDetector.detect(@data)
813
+ @data = CharlockHolmes::Converter.convert @data, detection[:encoding], 'UTF-8'
814
+ rescue
815
+ raise 'oa_templater_exception'
816
+ end
817
+ end
818
+
819
+ def capture_the(prop, reg, offset = 0)
820
+ matches = @data.match(reg, offset)
821
+ @scrapes[prop] = matches ? matches : nil
822
+ @props[prop] = matches ? matches[1] : ''
823
+ end
824
+
825
+ def format_date(format, date)
826
+ #date is MatchData object with three captures, the first being Heisei year
827
+ #convert from 全角文字 to normal ascii
828
+ return '' if date.nil?
829
+ y = (NKF.nkf('-m0Z1 -w', date[1]).to_i + 1988).to_s
830
+ m = (NKF.nkf('-m0Z1 -w', date[2]).to_i).to_s
831
+ d = (NKF.nkf('-m0Z1 -w', date[3]).to_i).to_s
832
+ sprintf(format, y, m, d)
833
+ end
834
+
835
+ def pick_template
836
+ case @data
837
+ when /審判請求の番.*不服.*特許出願の番号.*特願.*起案日.*審判長.*代理人弁理士/m
838
+ @template = @templates[:shinpankyozetsuriyu]
839
+ @template_name = '審判拒絶理由'
840
+ when /<TITLE>拒絶理由通知書<\/TITLE>/i
841
+ @template = @templates[:kyozetsuriyu]
842
+ @template_name = '拒絶理由'
843
+ when /<TITLE>補正の却下の決定<\/TITLE>/i
844
+ @template = @templates[:rejectamendments]
845
+ @template_name = '補正の却下の決定'
846
+ when /<TITLE>拒絶査定<\/TITLE>/i
847
+ @template = @templates[:kyozetsusatei]
848
+ @template_name = '拒絶査定'
849
+ when /<TITLE>審尋(審判官)<\/TITLE>/i
850
+ @template = @templates[:shinnen]
851
+ @template_name = '審尋'
852
+ when /<TITLE>同一出願人による同日出願通知書<\/TITLE>/i
853
+ @template = @templates[:shireisho]
854
+ @template_name = '指令書'
855
+ else
856
+ # not satei or riyu, default to riyu
857
+ @template = @templates[:kyozetsuriyu]
858
+ @template_name = '拒絶理由'
859
+ end
860
+ end
861
+ end
862
+ end