embulk-filter-icu4j 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/ICU_LICENSE.txt +330 -0
- data/LICENSE.txt +21 -0
- data/README.md +58 -0
- data/build.gradle +77 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +164 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/filter/icu4j.rb +3 -0
- data/src/main/java/org/embulk/filter/icu4j/Icu4jFilterPlugin.java +165 -0
- data/src/test/java/org/embulk/filter/icu4j/TestIcu4jFilterPlugin.java +5 -0
- metadata +86 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1f2c54a916cde80da7581afdbbea0e1bbc0175a8
|
4
|
+
data.tar.gz: bc2b45d7f9fc98642a1e77bdde256847f22383c8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6b4bc3099d6a77955bccddbbb805951fcebcaacf1f62bda9b13a35c758ef44d68161ce208c82a639aaa07044bebbf7cb64994826c69aafd944f1ec3ca1854acf
|
7
|
+
data.tar.gz: 7c10659f1f85f3bd4be6ede17e931fa66d067ae427d80dd172ccc4bf6fdd194f66c902a588100cb6625dfa14fd1d5ebdab4676db0dac0ee11a53db8af22abc94
|
data/.gitignore
ADDED
data/ICU_LICENSE.txt
ADDED
@@ -0,0 +1,330 @@
|
|
1
|
+
ICU License - ICU 1.8.1 and later
|
2
|
+
|
3
|
+
COPYRIGHT AND PERMISSION NOTICE
|
4
|
+
|
5
|
+
Copyright (c) 1995-2015 International Business Machines Corporation and others
|
6
|
+
|
7
|
+
All rights reserved.
|
8
|
+
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, provided that the above copyright notice(s) and this permission notice appear in all copies of the Software and that both the above copyright notice(s) and this permission notice appear in supporting documentation.
|
10
|
+
|
11
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
12
|
+
|
13
|
+
Except as contained in this notice, the name of a copyright holder shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization of the copyright holder.
|
14
|
+
|
15
|
+
All trademarks and registered trademarks mentioned herein are the property of their respective owners.
|
16
|
+
|
17
|
+
Third-Party Software Licenses
|
18
|
+
|
19
|
+
This section contains third-party software notices and/or additional terms for licensed third-party software components included within ICU libraries.
|
20
|
+
1. Unicode Data Files and Software
|
21
|
+
|
22
|
+
COPYRIGHT AND PERMISSION NOTICE
|
23
|
+
|
24
|
+
Copyright © 1991-2015 Unicode, Inc. All rights reserved.
|
25
|
+
Distributed under the Terms of Use in
|
26
|
+
http://www.unicode.org/copyright.html.
|
27
|
+
|
28
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
29
|
+
a copy of the Unicode data files and any associated documentation
|
30
|
+
(the "Data Files") or Unicode software and any associated documentation
|
31
|
+
(the "Software") to deal in the Data Files or Software
|
32
|
+
without restriction, including without limitation the rights to use,
|
33
|
+
copy, modify, merge, publish, distribute, and/or sell copies of
|
34
|
+
the Data Files or Software, and to permit persons to whom the Data Files
|
35
|
+
or Software are furnished to do so, provided that
|
36
|
+
(a) this copyright and permission notice appear with all copies
|
37
|
+
of the Data Files or Software,
|
38
|
+
(b) this copyright and permission notice appear in associated
|
39
|
+
documentation, and
|
40
|
+
(c) there is clear notice in each modified Data File or in the Software
|
41
|
+
as well as in the documentation associated with the Data File(s) or
|
42
|
+
Software that the data or software has been modified.
|
43
|
+
|
44
|
+
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
45
|
+
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
46
|
+
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
47
|
+
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
48
|
+
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
49
|
+
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
50
|
+
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
51
|
+
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
52
|
+
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
53
|
+
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
54
|
+
|
55
|
+
Except as contained in this notice, the name of a copyright holder
|
56
|
+
shall not be used in advertising or otherwise to promote the sale,
|
57
|
+
use or other dealings in these Data Files or Software without prior
|
58
|
+
written authorization of the copyright holder.
|
59
|
+
2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
|
60
|
+
|
61
|
+
# The Google Chrome software developed by Google is licensed under the BSD license. Other software included in this distribution is provided under other licenses, as set forth below.
|
62
|
+
#
|
63
|
+
# The BSD License
|
64
|
+
# http://opensource.org/licenses/bsd-license.php
|
65
|
+
# Copyright (C) 2006-2008, Google Inc.
|
66
|
+
#
|
67
|
+
# All rights reserved.
|
68
|
+
#
|
69
|
+
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
70
|
+
#
|
71
|
+
# Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
72
|
+
# Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
73
|
+
# Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
74
|
+
#
|
75
|
+
#
|
76
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
77
|
+
#
|
78
|
+
#
|
79
|
+
# The word list in cjdict.txt are generated by combining three word lists listed
|
80
|
+
# below with further processing for compound word breaking. The frequency is generated
|
81
|
+
# with an iterative training against Google web corpora.
|
82
|
+
#
|
83
|
+
# * Libtabe (Chinese)
|
84
|
+
# - https://sourceforge.net/project/?group_id=1519
|
85
|
+
# - Its license terms and conditions are shown below.
|
86
|
+
#
|
87
|
+
# * IPADIC (Japanese)
|
88
|
+
# - http://chasen.aist-nara.ac.jp/chasen/distribution.html
|
89
|
+
# - Its license terms and conditions are shown below.
|
90
|
+
#
|
91
|
+
# ---------COPYING.libtabe ---- BEGIN--------------------
|
92
|
+
#
|
93
|
+
# /*
|
94
|
+
# * Copyrighy (c) 1999 TaBE Project.
|
95
|
+
# * Copyright (c) 1999 Pai-Hsiang Hsiao.
|
96
|
+
# * All rights reserved.
|
97
|
+
# *
|
98
|
+
# * Redistribution and use in source and binary forms, with or without
|
99
|
+
# * modification, are permitted provided that the following conditions
|
100
|
+
# * are met:
|
101
|
+
# *
|
102
|
+
# * . Redistributions of source code must retain the above copyright
|
103
|
+
# * notice, this list of conditions and the following disclaimer.
|
104
|
+
# * . Redistributions in binary form must reproduce the above copyright
|
105
|
+
# * notice, this list of conditions and the following disclaimer in
|
106
|
+
# * the documentation and/or other materials provided with the
|
107
|
+
# * distribution.
|
108
|
+
# * . Neither the name of the TaBE Project nor the names of its
|
109
|
+
# * contributors may be used to endorse or promote products derived
|
110
|
+
# * from this software without specific prior written permission.
|
111
|
+
# *
|
112
|
+
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
113
|
+
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
114
|
+
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
115
|
+
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
116
|
+
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
117
|
+
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
118
|
+
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
119
|
+
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
120
|
+
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
121
|
+
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
122
|
+
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
123
|
+
# * OF THE POSSIBILITY OF SUCH DAMAGE.
|
124
|
+
# */
|
125
|
+
#
|
126
|
+
# /*
|
127
|
+
# * Copyright (c) 1999 Computer Systems and Communication Lab,
|
128
|
+
# * Institute of Information Science, Academia Sinica.
|
129
|
+
# * All rights reserved.
|
130
|
+
# *
|
131
|
+
# * Redistribution and use in source and binary forms, with or without
|
132
|
+
# * modification, are permitted provided that the following conditions
|
133
|
+
# * are met:
|
134
|
+
# *
|
135
|
+
# * . Redistributions of source code must retain the above copyright
|
136
|
+
# * notice, this list of conditions and the following disclaimer.
|
137
|
+
# * . Redistributions in binary form must reproduce the above copyright
|
138
|
+
# * notice, this list of conditions and the following disclaimer in
|
139
|
+
# * the documentation and/or other materials provided with the
|
140
|
+
# * distribution.
|
141
|
+
# * . Neither the name of the Computer Systems and Communication Lab
|
142
|
+
# * nor the names of its contributors may be used to endorse or
|
143
|
+
# * promote products derived from this software without specific
|
144
|
+
# * prior written permission.
|
145
|
+
# *
|
146
|
+
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
147
|
+
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
148
|
+
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
149
|
+
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
150
|
+
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
151
|
+
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
152
|
+
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
153
|
+
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
154
|
+
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
155
|
+
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
156
|
+
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
157
|
+
# * OF THE POSSIBILITY OF SUCH DAMAGE.
|
158
|
+
# */
|
159
|
+
#
|
160
|
+
# Copyright 1996 Chih-Hao Tsai @ Beckman Institute, University of Illinois
|
161
|
+
# c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
|
162
|
+
#
|
163
|
+
# ---------------COPYING.libtabe-----END------------------------------------
|
164
|
+
#
|
165
|
+
#
|
166
|
+
# ---------------COPYING.ipadic-----BEGIN------------------------------------
|
167
|
+
#
|
168
|
+
# Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
|
169
|
+
# and Technology. All Rights Reserved.
|
170
|
+
#
|
171
|
+
# Use, reproduction, and distribution of this software is permitted.
|
172
|
+
# Any copy of this software, whether in its original form or modified,
|
173
|
+
# must include both the above copyright notice and the following
|
174
|
+
# paragraphs.
|
175
|
+
#
|
176
|
+
# Nara Institute of Science and Technology (NAIST),
|
177
|
+
# the copyright holders, disclaims all warranties with regard to this
|
178
|
+
# software, including all implied warranties of merchantability and
|
179
|
+
# fitness, in no event shall NAIST be liable for
|
180
|
+
# any special, indirect or consequential damages or any damages
|
181
|
+
# whatsoever resulting from loss of use, data or profits, whether in an
|
182
|
+
# action of contract, negligence or other tortuous action, arising out
|
183
|
+
# of or in connection with the use or performance of this software.
|
184
|
+
#
|
185
|
+
# A large portion of the dictionary entries
|
186
|
+
# originate from ICOT Free Software. The following conditions for ICOT
|
187
|
+
# Free Software applies to the current dictionary as well.
|
188
|
+
#
|
189
|
+
# Each User may also freely distribute the Program, whether in its
|
190
|
+
# original form or modified, to any third party or parties, PROVIDED
|
191
|
+
# that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
|
192
|
+
# on, or be attached to, the Program, which is distributed substantially
|
193
|
+
# in the same form as set out herein and that such intended
|
194
|
+
# distribution, if actually made, will neither violate or otherwise
|
195
|
+
# contravene any of the laws and regulations of the countries having
|
196
|
+
# jurisdiction over the User or the intended distribution itself.
|
197
|
+
#
|
198
|
+
# NO WARRANTY
|
199
|
+
#
|
200
|
+
# The program was produced on an experimental basis in the course of the
|
201
|
+
# research and development conducted during the project and is provided
|
202
|
+
# to users as so produced on an experimental basis. Accordingly, the
|
203
|
+
# program is provided without any warranty whatsoever, whether express,
|
204
|
+
# implied, statutory or otherwise. The term "warranty" used herein
|
205
|
+
# includes, but is not limited to, any warranty of the quality,
|
206
|
+
# performance, merchantability and fitness for a particular purpose of
|
207
|
+
# the program and the nonexistence of any infringement or violation of
|
208
|
+
# any right of any third party.
|
209
|
+
#
|
210
|
+
# Each user of the program will agree and understand, and be deemed to
|
211
|
+
# have agreed and understood, that there is no warranty whatsoever for
|
212
|
+
# the program and, accordingly, the entire risk arising from or
|
213
|
+
# otherwise connected with the program is assumed by the user.
|
214
|
+
#
|
215
|
+
# Therefore, neither ICOT, the copyright holder, or any other
|
216
|
+
# organization that participated in or was otherwise related to the
|
217
|
+
# development of the program and their respective officials, directors,
|
218
|
+
# officers and other employees shall be held liable for any and all
|
219
|
+
# damages, including, without limitation, general, special, incidental
|
220
|
+
# and consequential damages, arising out of or otherwise in connection
|
221
|
+
# with the use or inability to use the program or any product, material
|
222
|
+
# or result produced or otherwise obtained by using the program,
|
223
|
+
# regardless of whether they have been advised of, or otherwise had
|
224
|
+
# knowledge of, the possibility of such damages at any time during the
|
225
|
+
# project or thereafter. Each user will be deemed to have agreed to the
|
226
|
+
# foregoing by his or her commencement of use of the program. The term
|
227
|
+
# "use" as used herein includes, but is not limited to, the use,
|
228
|
+
# modification, copying and distribution of the program and the
|
229
|
+
# production of secondary products from the program.
|
230
|
+
#
|
231
|
+
# In the case where the program, whether in its original form or
|
232
|
+
# modified, was distributed or delivered to or received by a user from
|
233
|
+
# any person, organization or entity other than ICOT, unless it makes or
|
234
|
+
# grants independently of ICOT any specific warranty to the user in
|
235
|
+
# writing, such person, organization or entity, will also be exempted
|
236
|
+
# from and not be held liable to the user for any such damages as noted
|
237
|
+
# above as far as the program is concerned.
|
238
|
+
#
|
239
|
+
# ---------------COPYING.ipadic-----END------------------------------------
|
240
|
+
3. Lao Word Break Dictionary Data (laodict.txt)
|
241
|
+
|
242
|
+
# Copyright (c) 2013 International Business Machines Corporation
|
243
|
+
# and others. All Rights Reserved.
|
244
|
+
#
|
245
|
+
# Project: http://code.google.com/p/lao-dictionary/
|
246
|
+
# Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
|
247
|
+
# License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
|
248
|
+
# (copied below)
|
249
|
+
#
|
250
|
+
# This file is derived from the above dictionary, with slight modifications.
|
251
|
+
# --------------------------------------------------------------------------------
|
252
|
+
# Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
|
253
|
+
# All rights reserved.
|
254
|
+
#
|
255
|
+
# Redistribution and use in source and binary forms, with or without modification,
|
256
|
+
# are permitted provided that the following conditions are met:
|
257
|
+
#
|
258
|
+
# Redistributions of source code must retain the above copyright notice, this
|
259
|
+
# list of conditions and the following disclaimer. Redistributions in binary
|
260
|
+
# form must reproduce the above copyright notice, this list of conditions and
|
261
|
+
# the following disclaimer in the documentation and/or other materials
|
262
|
+
# provided with the distribution.
|
263
|
+
#
|
264
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
265
|
+
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
266
|
+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
267
|
+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
268
|
+
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
269
|
+
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
270
|
+
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
271
|
+
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
272
|
+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
273
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
274
|
+
# --------------------------------------------------------------------------------
|
275
|
+
4. Burmese Word Break Dictionary Data (burmesedict.txt)
|
276
|
+
|
277
|
+
# Copyright (c) 2014 International Business Machines Corporation
|
278
|
+
# and others. All Rights Reserved.
|
279
|
+
#
|
280
|
+
# This list is part of a project hosted at:
|
281
|
+
# github.com/kanyawtech/myanmar-karen-word-lists
|
282
|
+
#
|
283
|
+
# --------------------------------------------------------------------------------
|
284
|
+
# Copyright (c) 2013, LeRoy Benjamin Sharon
|
285
|
+
# All rights reserved.
|
286
|
+
#
|
287
|
+
# Redistribution and use in source and binary forms, with or without modification,
|
288
|
+
# are permitted provided that the following conditions are met:
|
289
|
+
#
|
290
|
+
# Redistributions of source code must retain the above copyright notice, this
|
291
|
+
# list of conditions and the following disclaimer.
|
292
|
+
#
|
293
|
+
# Redistributions in binary form must reproduce the above copyright notice, this
|
294
|
+
# list of conditions and the following disclaimer in the documentation and/or
|
295
|
+
# other materials provided with the distribution.
|
296
|
+
#
|
297
|
+
# Neither the name Myanmar Karen Word Lists, nor the names of its
|
298
|
+
# contributors may be used to endorse or promote products derived from
|
299
|
+
# this software without specific prior written permission.
|
300
|
+
#
|
301
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
302
|
+
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
303
|
+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
304
|
+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
305
|
+
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
306
|
+
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
307
|
+
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
308
|
+
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
309
|
+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
310
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
311
|
+
# --------------------------------------------------------------------------------
|
312
|
+
5. Time Zone Database
|
313
|
+
|
314
|
+
ICU uses the public domain data and code derived from Time Zone Database for its time zone support. The ownership of the TZ database is explained in BCP 175: Procedure for Maintaining the Time Zone Database section 7.
|
315
|
+
|
316
|
+
7. Database Ownership
|
317
|
+
|
318
|
+
The TZ database itself is not an IETF Contribution or an IETF
|
319
|
+
document. Rather it is a pre-existing and regularly updated work
|
320
|
+
that is in the public domain, and is intended to remain in the public
|
321
|
+
domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do not apply
|
322
|
+
to the TZ Database or contributions that individuals make to it.
|
323
|
+
Should any claims be made and substantiated against the TZ Database,
|
324
|
+
the organization that is providing the IANA Considerations defined in
|
325
|
+
this RFC, under the memorandum of understanding with the IETF,
|
326
|
+
currently ICANN, may act in accordance with all competent court
|
327
|
+
orders. No ownership claims will be made by ICANN or the IETF Trust
|
328
|
+
on the database or the code. Any person making a contribution to the
|
329
|
+
database or code waives all rights to future claims in that
|
330
|
+
contribution or in the TZ Database.
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
|
2
|
+
MIT License
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
a copy of this software and associated documentation files (the
|
6
|
+
"Software"), to deal in the Software without restriction, including
|
7
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# Icu4j filter plugin for Embulk
|
2
|
+
|
3
|
+
Icu4j filter plugin for Embulk.
|
4
|
+
see. http://site.icu-project.org/
|
5
|
+
|
6
|
+
## Overview
|
7
|
+
|
8
|
+
* **Plugin type**: filter
|
9
|
+
|
10
|
+
## Configuration
|
11
|
+
|
12
|
+
- **key_names**: target key names. (list, required)
|
13
|
+
- **keep_input**: keep input columns. (bool, default: `true`)
|
14
|
+
- **settings**: settings. (list, required)
|
15
|
+
- **suffix**: output column name suffix. if null overwrite column. (string, default: null)
|
16
|
+
- **transliterators**: transliterator IDS(comma separated). see http://hondou.homedns.org/pukiwiki/pukiwiki.php?Java%20ICU4J. (string)
|
17
|
+
- **case**: upper or lower (string, default: null)
|
18
|
+
|
19
|
+
## Example
|
20
|
+
|
21
|
+
```yaml
|
22
|
+
filters:
|
23
|
+
- type: icu4j
|
24
|
+
keep_input: false
|
25
|
+
key_names:
|
26
|
+
- catchcopy
|
27
|
+
settings:
|
28
|
+
- { suffix: _katakana, transliterators: 'Katakana-Hiragana,Fullwidth-Halfwidth', case: upper }
|
29
|
+
- { transliterators: 'Katakana-Hiragana', case: lower }
|
30
|
+
- { suffix: _romaji_lower, transliterators: 'Katakana-Hiragana,Hiragana-Latin', case: lower }
|
31
|
+
```
|
32
|
+
|
33
|
+
### input
|
34
|
+
|
35
|
+
```json
|
36
|
+
{
|
37
|
+
"catchcopy" : "ホゲホゲ"
|
38
|
+
}
|
39
|
+
```
|
40
|
+
|
41
|
+
As below
|
42
|
+
|
43
|
+
```json
|
44
|
+
{
|
45
|
+
"catchcopy" : "ほげほげ",
|
46
|
+
"catchcopy_katakana" : "ホゲホゲ",
|
47
|
+
"catchcopy_romaji_lower" : "hogehoge"
|
48
|
+
}
|
49
|
+
```
|
50
|
+
|
51
|
+
## transliterator rules
|
52
|
+
see. http://hondou.homedns.org/pukiwiki/pukiwiki.php?Java%20ICU4J
|
53
|
+
|
54
|
+
## Build
|
55
|
+
|
56
|
+
```
|
57
|
+
$ ./gradlew gem # -t to watch change of files and rebuild continuously
|
58
|
+
```
|
data/build.gradle
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
plugins {
|
2
|
+
id "com.jfrog.bintray" version "1.1"
|
3
|
+
id "com.github.jruby-gradle.base" version "0.1.5"
|
4
|
+
id "java"
|
5
|
+
id "eclipse"
|
6
|
+
}
|
7
|
+
import com.github.jrubygradle.JRubyExec
|
8
|
+
repositories {
|
9
|
+
mavenCentral()
|
10
|
+
jcenter()
|
11
|
+
}
|
12
|
+
configurations {
|
13
|
+
provided
|
14
|
+
}
|
15
|
+
|
16
|
+
version = "0.1.0"
|
17
|
+
|
18
|
+
sourceCompatibility = 1.7
|
19
|
+
targetCompatibility = 1.7
|
20
|
+
|
21
|
+
dependencies {
|
22
|
+
compile "org.embulk:embulk-core:0.7.4"
|
23
|
+
provided "org.embulk:embulk-core:0.7.4"
|
24
|
+
compile 'icu:icu4j:2.6.1'
|
25
|
+
testCompile "junit:junit:4.+"
|
26
|
+
}
|
27
|
+
|
28
|
+
task classpath(type: Copy, dependsOn: ["jar"]) {
|
29
|
+
doFirst { file("classpath").deleteDir() }
|
30
|
+
from (configurations.runtime - configurations.provided + files(jar.archivePath))
|
31
|
+
into "classpath"
|
32
|
+
}
|
33
|
+
clean { delete "classpath" }
|
34
|
+
|
35
|
+
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
36
|
+
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
|
37
|
+
script "${project.name}.gemspec"
|
38
|
+
doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
|
39
|
+
}
|
40
|
+
|
41
|
+
task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
|
42
|
+
jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
|
43
|
+
script "pkg/${project.name}-${project.version}.gem"
|
44
|
+
}
|
45
|
+
|
46
|
+
task "package"(dependsOn: ["gemspec", "classpath"]) << {
|
47
|
+
println "> Build succeeded."
|
48
|
+
println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
|
49
|
+
}
|
50
|
+
|
51
|
+
task gemspec {
|
52
|
+
ext.gemspecFile = file("${project.name}.gemspec")
|
53
|
+
inputs.file "build.gradle"
|
54
|
+
outputs.file gemspecFile
|
55
|
+
doLast { gemspecFile.write($/
|
56
|
+
Gem::Specification.new do |spec|
|
57
|
+
spec.name = "${project.name}"
|
58
|
+
spec.version = "${project.version}"
|
59
|
+
spec.authors = ["toyama0919"]
|
60
|
+
spec.summary = %[Icu4j filter plugin for Embulk]
|
61
|
+
spec.description = %[Icu4j filter plugin for Embulk. see http://site.icu-project.org/]
|
62
|
+
spec.email = ["toyama0919@gmail.com"]
|
63
|
+
spec.licenses = ["MIT"]
|
64
|
+
spec.homepage = "https://github.com/toyama0919/embulk-filter-icu4j"
|
65
|
+
|
66
|
+
spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
|
67
|
+
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
68
|
+
spec.require_paths = ["lib"]
|
69
|
+
|
70
|
+
#spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
|
71
|
+
spec.add_development_dependency 'bundler', ['~> 1.0']
|
72
|
+
spec.add_development_dependency 'rake', ['>= 10.0']
|
73
|
+
end
|
74
|
+
/$)
|
75
|
+
}
|
76
|
+
}
|
77
|
+
clean { delete "${project.name}.gemspec" }
|
Binary file
|
data/gradlew
ADDED
@@ -0,0 +1,164 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
##############################################################################
|
4
|
+
##
|
5
|
+
## Gradle start up script for UN*X
|
6
|
+
##
|
7
|
+
##############################################################################
|
8
|
+
|
9
|
+
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
10
|
+
DEFAULT_JVM_OPTS=""
|
11
|
+
|
12
|
+
APP_NAME="Gradle"
|
13
|
+
APP_BASE_NAME=`basename "$0"`
|
14
|
+
|
15
|
+
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
16
|
+
MAX_FD="maximum"
|
17
|
+
|
18
|
+
warn ( ) {
|
19
|
+
echo "$*"
|
20
|
+
}
|
21
|
+
|
22
|
+
die ( ) {
|
23
|
+
echo
|
24
|
+
echo "$*"
|
25
|
+
echo
|
26
|
+
exit 1
|
27
|
+
}
|
28
|
+
|
29
|
+
# OS specific support (must be 'true' or 'false').
|
30
|
+
cygwin=false
|
31
|
+
msys=false
|
32
|
+
darwin=false
|
33
|
+
case "`uname`" in
|
34
|
+
CYGWIN* )
|
35
|
+
cygwin=true
|
36
|
+
;;
|
37
|
+
Darwin* )
|
38
|
+
darwin=true
|
39
|
+
;;
|
40
|
+
MINGW* )
|
41
|
+
msys=true
|
42
|
+
;;
|
43
|
+
esac
|
44
|
+
|
45
|
+
# For Cygwin, ensure paths are in UNIX format before anything is touched.
|
46
|
+
if $cygwin ; then
|
47
|
+
[ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
|
48
|
+
fi
|
49
|
+
|
50
|
+
# Attempt to set APP_HOME
|
51
|
+
# Resolve links: $0 may be a link
|
52
|
+
PRG="$0"
|
53
|
+
# Need this for relative symlinks.
|
54
|
+
while [ -h "$PRG" ] ; do
|
55
|
+
ls=`ls -ld "$PRG"`
|
56
|
+
link=`expr "$ls" : '.*-> \(.*\)$'`
|
57
|
+
if expr "$link" : '/.*' > /dev/null; then
|
58
|
+
PRG="$link"
|
59
|
+
else
|
60
|
+
PRG=`dirname "$PRG"`"/$link"
|
61
|
+
fi
|
62
|
+
done
|
63
|
+
SAVED="`pwd`"
|
64
|
+
cd "`dirname \"$PRG\"`/" >&-
|
65
|
+
APP_HOME="`pwd -P`"
|
66
|
+
cd "$SAVED" >&-
|
67
|
+
|
68
|
+
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
69
|
+
|
70
|
+
# Determine the Java command to use to start the JVM.
|
71
|
+
if [ -n "$JAVA_HOME" ] ; then
|
72
|
+
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
73
|
+
# IBM's JDK on AIX uses strange locations for the executables
|
74
|
+
JAVACMD="$JAVA_HOME/jre/sh/java"
|
75
|
+
else
|
76
|
+
JAVACMD="$JAVA_HOME/bin/java"
|
77
|
+
fi
|
78
|
+
if [ ! -x "$JAVACMD" ] ; then
|
79
|
+
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
80
|
+
|
81
|
+
Please set the JAVA_HOME variable in your environment to match the
|
82
|
+
location of your Java installation."
|
83
|
+
fi
|
84
|
+
else
|
85
|
+
JAVACMD="java"
|
86
|
+
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
87
|
+
|
88
|
+
Please set the JAVA_HOME variable in your environment to match the
|
89
|
+
location of your Java installation."
|
90
|
+
fi
|
91
|
+
|
92
|
+
# Increase the maximum file descriptors if we can.
|
93
|
+
if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
|
94
|
+
MAX_FD_LIMIT=`ulimit -H -n`
|
95
|
+
if [ $? -eq 0 ] ; then
|
96
|
+
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
|
97
|
+
MAX_FD="$MAX_FD_LIMIT"
|
98
|
+
fi
|
99
|
+
ulimit -n $MAX_FD
|
100
|
+
if [ $? -ne 0 ] ; then
|
101
|
+
warn "Could not set maximum file descriptor limit: $MAX_FD"
|
102
|
+
fi
|
103
|
+
else
|
104
|
+
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
|
105
|
+
fi
|
106
|
+
fi
|
107
|
+
|
108
|
+
# For Darwin, add options to specify how the application appears in the dock
|
109
|
+
if $darwin; then
|
110
|
+
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
|
111
|
+
fi
|
112
|
+
|
113
|
+
# For Cygwin, switch paths to Windows format before running java
|
114
|
+
if $cygwin ; then
|
115
|
+
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
|
116
|
+
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
|
117
|
+
|
118
|
+
# We build the pattern for arguments to be converted via cygpath
|
119
|
+
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
|
120
|
+
SEP=""
|
121
|
+
for dir in $ROOTDIRSRAW ; do
|
122
|
+
ROOTDIRS="$ROOTDIRS$SEP$dir"
|
123
|
+
SEP="|"
|
124
|
+
done
|
125
|
+
OURCYGPATTERN="(^($ROOTDIRS))"
|
126
|
+
# Add a user-defined pattern to the cygpath arguments
|
127
|
+
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
|
128
|
+
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
|
129
|
+
fi
|
130
|
+
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
131
|
+
i=0
|
132
|
+
for arg in "$@" ; do
|
133
|
+
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
|
134
|
+
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
|
135
|
+
|
136
|
+
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
|
137
|
+
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
|
138
|
+
else
|
139
|
+
eval `echo args$i`="\"$arg\""
|
140
|
+
fi
|
141
|
+
i=$((i+1))
|
142
|
+
done
|
143
|
+
case $i in
|
144
|
+
(0) set -- ;;
|
145
|
+
(1) set -- "$args0" ;;
|
146
|
+
(2) set -- "$args0" "$args1" ;;
|
147
|
+
(3) set -- "$args0" "$args1" "$args2" ;;
|
148
|
+
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
|
149
|
+
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
|
150
|
+
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
|
151
|
+
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
|
152
|
+
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
|
153
|
+
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
|
154
|
+
esac
|
155
|
+
fi
|
156
|
+
|
157
|
+
# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
|
158
|
+
function splitJvmOpts() {
|
159
|
+
JVM_OPTS=("$@")
|
160
|
+
}
|
161
|
+
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
|
162
|
+
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
|
163
|
+
|
164
|
+
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
|
data/gradlew.bat
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
@if "%DEBUG%" == "" @echo off
|
2
|
+
@rem ##########################################################################
|
3
|
+
@rem
|
4
|
+
@rem Gradle startup script for Windows
|
5
|
+
@rem
|
6
|
+
@rem ##########################################################################
|
7
|
+
|
8
|
+
@rem Set local scope for the variables with windows NT shell
|
9
|
+
if "%OS%"=="Windows_NT" setlocal
|
10
|
+
|
11
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
12
|
+
set DEFAULT_JVM_OPTS=
|
13
|
+
|
14
|
+
set DIRNAME=%~dp0
|
15
|
+
if "%DIRNAME%" == "" set DIRNAME=.
|
16
|
+
set APP_BASE_NAME=%~n0
|
17
|
+
set APP_HOME=%DIRNAME%
|
18
|
+
|
19
|
+
@rem Find java.exe
|
20
|
+
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
+
|
22
|
+
set JAVA_EXE=java.exe
|
23
|
+
%JAVA_EXE% -version >NUL 2>&1
|
24
|
+
if "%ERRORLEVEL%" == "0" goto init
|
25
|
+
|
26
|
+
echo.
|
27
|
+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
+
echo.
|
29
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
+
echo location of your Java installation.
|
31
|
+
|
32
|
+
goto fail
|
33
|
+
|
34
|
+
:findJavaFromJavaHome
|
35
|
+
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
+
|
38
|
+
if exist "%JAVA_EXE%" goto init
|
39
|
+
|
40
|
+
echo.
|
41
|
+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
+
echo.
|
43
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
+
echo location of your Java installation.
|
45
|
+
|
46
|
+
goto fail
|
47
|
+
|
48
|
+
:init
|
49
|
+
@rem Get command-line arguments, handling Windowz variants
|
50
|
+
|
51
|
+
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
+
if "%@eval[2+2]" == "4" goto 4NT_args
|
53
|
+
|
54
|
+
:win9xME_args
|
55
|
+
@rem Slurp the command line arguments.
|
56
|
+
set CMD_LINE_ARGS=
|
57
|
+
set _SKIP=2
|
58
|
+
|
59
|
+
:win9xME_args_slurp
|
60
|
+
if "x%~1" == "x" goto execute
|
61
|
+
|
62
|
+
set CMD_LINE_ARGS=%*
|
63
|
+
goto execute
|
64
|
+
|
65
|
+
:4NT_args
|
66
|
+
@rem Get arguments from the 4NT Shell from JP Software
|
67
|
+
set CMD_LINE_ARGS=%$
|
68
|
+
|
69
|
+
:execute
|
70
|
+
@rem Setup the command line
|
71
|
+
|
72
|
+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
73
|
+
|
74
|
+
@rem Execute Gradle
|
75
|
+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
76
|
+
|
77
|
+
:end
|
78
|
+
@rem End local scope for the variables with windows NT shell
|
79
|
+
if "%ERRORLEVEL%"=="0" goto mainEnd
|
80
|
+
|
81
|
+
:fail
|
82
|
+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
83
|
+
rem the _cmd.exe /c_ return code!
|
84
|
+
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
85
|
+
exit /b 1
|
86
|
+
|
87
|
+
:mainEnd
|
88
|
+
if "%OS%"=="Windows_NT" endlocal
|
89
|
+
|
90
|
+
:omega
|
@@ -0,0 +1,165 @@
|
|
1
|
+
package org.embulk.filter.icu4j;
|
2
|
+
|
3
|
+
import java.util.List;
|
4
|
+
import java.util.Map;
|
5
|
+
|
6
|
+
import org.embulk.config.Config;
|
7
|
+
import org.embulk.config.ConfigDefault;
|
8
|
+
import org.embulk.config.ConfigSource;
|
9
|
+
import org.embulk.config.Task;
|
10
|
+
import org.embulk.config.TaskSource;
|
11
|
+
import org.embulk.spi.Column;
|
12
|
+
import org.embulk.spi.Exec;
|
13
|
+
import org.embulk.spi.FilterPlugin;
|
14
|
+
import org.embulk.spi.Page;
|
15
|
+
import org.embulk.spi.PageBuilder;
|
16
|
+
import org.embulk.spi.PageOutput;
|
17
|
+
import org.embulk.spi.PageReader;
|
18
|
+
import org.embulk.spi.Schema;
|
19
|
+
import org.embulk.spi.type.Types;
|
20
|
+
|
21
|
+
import com.google.common.base.MoreObjects;
|
22
|
+
import com.google.common.collect.ImmutableList;
|
23
|
+
import com.google.common.collect.Lists;
|
24
|
+
import com.ibm.icu.text.Transliterator;
|
25
|
+
|
26
|
+
public class Icu4jFilterPlugin implements FilterPlugin
|
27
|
+
{
|
28
|
+
public interface PluginTask extends Task
|
29
|
+
{
|
30
|
+
@Config("key_names")
|
31
|
+
public List<String> getKeyNames();
|
32
|
+
|
33
|
+
@Config("keep_input")
|
34
|
+
@ConfigDefault("true")
|
35
|
+
public boolean getKeepInput();
|
36
|
+
|
37
|
+
@Config("settings")
|
38
|
+
public List<Map<String, String>> getSettings();
|
39
|
+
}
|
40
|
+
|
41
|
+
@Override
|
42
|
+
public void transaction(ConfigSource config, Schema inputSchema,
|
43
|
+
FilterPlugin.Control control)
|
44
|
+
{
|
45
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
46
|
+
|
47
|
+
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
48
|
+
int i = 0;
|
49
|
+
if (task.getKeepInput()) {
|
50
|
+
for (Column inputColumn: inputSchema.getColumns()) {
|
51
|
+
Column outputColumn = new Column(i++, inputColumn.getName(), inputColumn.getType());
|
52
|
+
builder.add(outputColumn);
|
53
|
+
}
|
54
|
+
}
|
55
|
+
|
56
|
+
for (String key: task.getKeyNames()) {
|
57
|
+
for (Map<String, String> setting : task.getSettings()) {
|
58
|
+
Column outputColumn = new Column(i++, key + MoreObjects.firstNonNull(setting.get("suffix"), ""), Types.STRING);
|
59
|
+
builder.add(outputColumn);
|
60
|
+
}
|
61
|
+
}
|
62
|
+
Schema outputSchema = new Schema(builder.build());
|
63
|
+
control.run(task.dump(), outputSchema);
|
64
|
+
}
|
65
|
+
|
66
|
+
@Override
|
67
|
+
public PageOutput open(TaskSource taskSource, final Schema inputSchema, final Schema outputSchema, final PageOutput output)
|
68
|
+
{
|
69
|
+
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
70
|
+
final List<Column> keyNameColumns = Lists.newArrayList();
|
71
|
+
for (String keyName : task.getKeyNames()) {
|
72
|
+
keyNameColumns.add(inputSchema.lookupColumn(keyName));
|
73
|
+
}
|
74
|
+
final List<List<Transliterator>> transliterators = Lists.newArrayList();
|
75
|
+
for (Map<String, String> setting : task.getSettings()) {
|
76
|
+
List<Transliterator> tokenizers = Lists.newArrayList();
|
77
|
+
for (String convertType : setting.get("transliterators").split(",")) {
|
78
|
+
Transliterator transliterator = Transliterator.getInstance(convertType);
|
79
|
+
tokenizers.add(transliterator);
|
80
|
+
}
|
81
|
+
transliterators.add(tokenizers);
|
82
|
+
}
|
83
|
+
|
84
|
+
return new PageOutput() {
|
85
|
+
private PageReader reader = new PageReader(inputSchema);
|
86
|
+
|
87
|
+
@Override
|
88
|
+
public void finish() {
|
89
|
+
output.finish();
|
90
|
+
}
|
91
|
+
|
92
|
+
@Override
|
93
|
+
public void close() {
|
94
|
+
output.close();
|
95
|
+
}
|
96
|
+
|
97
|
+
@Override
|
98
|
+
public void add(Page page) {
|
99
|
+
reader.setPage(page);
|
100
|
+
|
101
|
+
try (final PageBuilder builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output)) {
|
102
|
+
while (reader.nextRecord()) {
|
103
|
+
setValue(builder);
|
104
|
+
builder.addRecord();
|
105
|
+
}
|
106
|
+
builder.finish();
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
/**
|
111
|
+
* @param builder
|
112
|
+
*/
|
113
|
+
private void setValue(PageBuilder builder) {
|
114
|
+
if (task.getKeepInput()) {
|
115
|
+
for (Column inputColumn: inputSchema.getColumns()) {
|
116
|
+
if (reader.isNull(inputColumn)) {
|
117
|
+
builder.setNull(inputColumn);
|
118
|
+
continue;
|
119
|
+
}
|
120
|
+
if (Types.STRING.equals(inputColumn.getType())) {
|
121
|
+
builder.setString(inputColumn, reader.getString(inputColumn));
|
122
|
+
} else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
123
|
+
builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
|
124
|
+
} else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
125
|
+
builder.setDouble(inputColumn, reader.getDouble(inputColumn));
|
126
|
+
} else if (Types.LONG.equals(inputColumn.getType())) {
|
127
|
+
builder.setLong(inputColumn, reader.getLong(inputColumn));
|
128
|
+
} else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
129
|
+
builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
|
130
|
+
}
|
131
|
+
}
|
132
|
+
}
|
133
|
+
|
134
|
+
List<Map<String, String>> settings = task.getSettings();
|
135
|
+
for (Column column : keyNameColumns) {
|
136
|
+
for (int i = 0; i < settings.size(); i++) {
|
137
|
+
Map<String, String> setting = settings.get(i);
|
138
|
+
String suffix = setting.get("suffix");
|
139
|
+
Column outputColumn = outputSchema.lookupColumn(column.getName() + MoreObjects.firstNonNull(suffix, ""));
|
140
|
+
builder.setString(outputColumn, convert(column, suffix, setting.get("case"), transliterators.get(i)));
|
141
|
+
}
|
142
|
+
}
|
143
|
+
}
|
144
|
+
|
145
|
+
/**
|
146
|
+
* @param column
|
147
|
+
* @param suffix
|
148
|
+
* @param type
|
149
|
+
* @return
|
150
|
+
*/
|
151
|
+
private String convert(Column column, String suffix, String type, List<Transliterator> transliterators) {
|
152
|
+
String string = reader.getString(column);
|
153
|
+
for (Transliterator transliterator : transliterators) {
|
154
|
+
string = transliterator.transliterate(string);
|
155
|
+
}
|
156
|
+
if ("upper".equals(type)) {
|
157
|
+
string = string.toUpperCase();
|
158
|
+
} else if ("lower".equals(type)) {
|
159
|
+
string = string.toLowerCase();
|
160
|
+
}
|
161
|
+
return string;
|
162
|
+
}
|
163
|
+
};
|
164
|
+
}
|
165
|
+
}
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: embulk-filter-icu4j
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- toyama0919
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-10-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: '1.0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '10.0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
|
+
description: Icu4j filter plugin for Embulk. see http://site.icu-project.org/
|
42
|
+
email:
|
43
|
+
- toyama0919@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .gitignore
|
49
|
+
- ICU_LICENSE.txt
|
50
|
+
- LICENSE.txt
|
51
|
+
- README.md
|
52
|
+
- build.gradle
|
53
|
+
- gradle/wrapper/gradle-wrapper.jar
|
54
|
+
- gradle/wrapper/gradle-wrapper.properties
|
55
|
+
- gradlew
|
56
|
+
- gradlew.bat
|
57
|
+
- lib/embulk/filter/icu4j.rb
|
58
|
+
- src/main/java/org/embulk/filter/icu4j/Icu4jFilterPlugin.java
|
59
|
+
- src/test/java/org/embulk/filter/icu4j/TestIcu4jFilterPlugin.java
|
60
|
+
- classpath/embulk-filter-icu4j-0.1.0.jar
|
61
|
+
homepage: https://github.com/toyama0919/embulk-filter-icu4j
|
62
|
+
licenses:
|
63
|
+
- MIT
|
64
|
+
metadata: {}
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options: []
|
67
|
+
require_paths:
|
68
|
+
- lib
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - '>='
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - '>='
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
requirements: []
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 2.1.9
|
82
|
+
signing_key:
|
83
|
+
specification_version: 4
|
84
|
+
summary: Icu4j filter plugin for Embulk
|
85
|
+
test_files: []
|
86
|
+
has_rdoc:
|