embulk-filter-kuromoji 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +5 -5
- data/src/main/java/org/embulk/filter/kuromoji/KuromojiFilterPlugin.java +6 -4
- data/src/main/java/org/embulk/filter/kuromoji/KuromojiPageOutput.java +42 -20
- data/src/main/java/org/embulk/filter/kuromoji/NeologdPageOutput.java +46 -24
- data/src/main/java/org/embulk/filter/kuromoji/Token.java +20 -10
- metadata +6 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f761d94b92551164712a27d55503e5fb44bf9530
|
4
|
+
data.tar.gz: 71d3c24850363da272c604e1bbffe62821c1e1f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95ffe8e33a1b6c0d2be2c7985dcc8474e786dcbc82899a89e5091155f4a2e3b4141a5308bd38807e4644f839a0ee91fae73d5a498f0e16b64a2cab98607534f9
|
7
|
+
data.tar.gz: ac70dbc1e7f830758a5da0ee46df782f3b95af2367184b78f511405621d9273b7e86b907f33987dbfaed4cdcf9ffdd3c62381d16c07401ab8df346c0023c01a5
|
data/build.gradle
CHANGED
@@ -17,18 +17,18 @@ configurations {
|
|
17
17
|
provided
|
18
18
|
}
|
19
19
|
|
20
|
-
version = "0.
|
20
|
+
version = "0.5.0"
|
21
21
|
|
22
22
|
sourceCompatibility = 1.7
|
23
23
|
targetCompatibility = 1.7
|
24
24
|
|
25
25
|
dependencies {
|
26
|
-
compile "org.embulk:embulk-core:0.8.
|
26
|
+
compile "org.embulk:embulk-core:0.8.15"
|
27
27
|
compile 'com.atilika.kuromoji:kuromoji-ipadic:0.9.0'
|
28
|
-
compile "org.codelibs:lucene-analyzers-kuromoji-ipadic-neologd:
|
29
|
-
provided "org.embulk:embulk-core:0.8.
|
28
|
+
compile "org.codelibs:lucene-analyzers-kuromoji-ipadic-neologd:6.2.1-20161201"
|
29
|
+
provided "org.embulk:embulk-core:0.8.15"
|
30
30
|
testCompile "junit:junit:4.+"
|
31
|
-
testCompile "org.embulk:embulk-core:0.8.
|
31
|
+
testCompile "org.embulk:embulk-core:0.8.15"
|
32
32
|
}
|
33
33
|
|
34
34
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
@@ -75,7 +75,7 @@ public class KuromojiFilterPlugin implements FilterPlugin
|
|
75
75
|
{
|
76
76
|
final String tokenizer = taskSource.loadTask(PluginTask.class).getTokenizer();
|
77
77
|
logger.info("Tokenizer => {}", tokenizer);
|
78
|
-
if (tokenizer.equals("neologd")){
|
78
|
+
if (tokenizer.equals("neologd")) {
|
79
79
|
return new NeologdPageOutput(taskSource, inputSchema, outputSchema, output);
|
80
80
|
}
|
81
81
|
return new KuromojiPageOutput(taskSource, inputSchema, outputSchema, output);
|
@@ -86,7 +86,8 @@ public class KuromojiFilterPlugin implements FilterPlugin
|
|
86
86
|
* @param task
|
87
87
|
* @return
|
88
88
|
*/
|
89
|
-
private Schema buildOutputSchema(PluginTask task, Schema inputSchema)
|
89
|
+
private Schema buildOutputSchema(PluginTask task, Schema inputSchema)
|
90
|
+
{
|
90
91
|
final List<Column> outputColumns = buildOutputColumns(task, inputSchema);
|
91
92
|
logger.debug("outputColumns => {}", outputColumns);
|
92
93
|
return new Schema(outputColumns);
|
@@ -97,7 +98,8 @@ public class KuromojiFilterPlugin implements FilterPlugin
|
|
97
98
|
* @param inputSchema
|
98
99
|
* @return
|
99
100
|
*/
|
100
|
-
private List<Column> buildOutputColumns(PluginTask task, Schema inputSchema)
|
101
|
+
private List<Column> buildOutputColumns(PluginTask task, Schema inputSchema)
|
102
|
+
{
|
101
103
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
102
104
|
Map<String, Column> map = Maps.newLinkedHashMap();
|
103
105
|
int i = 0;
|
@@ -117,7 +119,7 @@ public class KuromojiFilterPlugin implements FilterPlugin
|
|
117
119
|
}
|
118
120
|
|
119
121
|
i = 0;
|
120
|
-
for(Map.Entry<String, Column> e : map.entrySet()) {
|
122
|
+
for (Map.Entry<String, Column> e : map.entrySet()) {
|
121
123
|
final Column column = e.getValue();
|
122
124
|
builder.add(new Column(i++, column.getName(), column.getType()));
|
123
125
|
}
|
@@ -39,7 +39,8 @@ public class KuromojiPageOutput implements PageOutput
|
|
39
39
|
private final Schema outputSchema;
|
40
40
|
private static final Logger logger = Exec.getLogger(KuromojiFilterPlugin.class);
|
41
41
|
|
42
|
-
public KuromojiPageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output)
|
42
|
+
public KuromojiPageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output)
|
43
|
+
{
|
43
44
|
this.task = taskSource.loadTask(PluginTask.class);
|
44
45
|
this.inputSchema = inputSchema;
|
45
46
|
this.outputSchema = outputSchema;
|
@@ -48,9 +49,11 @@ public class KuromojiPageOutput implements PageOutput
|
|
48
49
|
if (task.getDictionaryPath().isPresent()) {
|
49
50
|
try {
|
50
51
|
builder.userDictionary(task.getDictionaryPath().get());
|
51
|
-
}
|
52
|
+
}
|
53
|
+
catch (FileNotFoundException e) {
|
52
54
|
e.printStackTrace();
|
53
|
-
}
|
55
|
+
}
|
56
|
+
catch (IOException e) {
|
54
57
|
e.printStackTrace();
|
55
58
|
}
|
56
59
|
}
|
@@ -58,9 +61,11 @@ public class KuromojiPageOutput implements PageOutput
|
|
58
61
|
Mode mode = null;
|
59
62
|
if (task.getMode().equals("normal")) {
|
60
63
|
mode = Mode.NORMAL;
|
61
|
-
}
|
64
|
+
}
|
65
|
+
else if (task.getMode().equals("search")) {
|
62
66
|
mode = Mode.SEARCH;
|
63
|
-
}
|
67
|
+
}
|
68
|
+
else if (task.getMode().equals("extended")) {
|
64
69
|
mode = Mode.EXTENDED;
|
65
70
|
}
|
66
71
|
|
@@ -76,17 +81,20 @@ public class KuromojiPageOutput implements PageOutput
|
|
76
81
|
}
|
77
82
|
|
78
83
|
@Override
|
79
|
-
public void finish()
|
84
|
+
public void finish()
|
85
|
+
{
|
80
86
|
builder.finish();
|
81
87
|
}
|
82
88
|
|
83
89
|
@Override
|
84
|
-
public void close()
|
90
|
+
public void close()
|
91
|
+
{
|
85
92
|
builder.close();
|
86
93
|
}
|
87
94
|
|
88
95
|
@Override
|
89
|
-
public void add(Page page)
|
96
|
+
public void add(Page page)
|
97
|
+
{
|
90
98
|
reader.setPage(page);
|
91
99
|
while (reader.nextRecord()) {
|
92
100
|
setValue(builder);
|
@@ -97,7 +105,8 @@ public class KuromojiPageOutput implements PageOutput
|
|
97
105
|
/**
|
98
106
|
* @param builder
|
99
107
|
*/
|
100
|
-
private void setValue(PageBuilder builder)
|
108
|
+
private void setValue(PageBuilder builder)
|
109
|
+
{
|
101
110
|
if (task.getKeepInput()) {
|
102
111
|
for (Column inputColumn : inputSchema.getColumns()) {
|
103
112
|
if (reader.isNull(inputColumn)) {
|
@@ -106,15 +115,20 @@ public class KuromojiPageOutput implements PageOutput
|
|
106
115
|
}
|
107
116
|
if (Types.STRING.equals(inputColumn.getType())) {
|
108
117
|
builder.setString(inputColumn, reader.getString(inputColumn));
|
109
|
-
}
|
118
|
+
}
|
119
|
+
else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
110
120
|
builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
|
111
|
-
}
|
121
|
+
}
|
122
|
+
else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
112
123
|
builder.setDouble(inputColumn, reader.getDouble(inputColumn));
|
113
|
-
}
|
124
|
+
}
|
125
|
+
else if (Types.LONG.equals(inputColumn.getType())) {
|
114
126
|
builder.setLong(inputColumn, reader.getLong(inputColumn));
|
115
|
-
}
|
127
|
+
}
|
128
|
+
else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
116
129
|
builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
|
117
|
-
}
|
130
|
+
}
|
131
|
+
else if (Types.JSON.equals(inputColumn.getType())) {
|
118
132
|
builder.setJson(inputColumn, reader.getJson(inputColumn));
|
119
133
|
}
|
120
134
|
}
|
@@ -131,13 +145,17 @@ public class KuromojiPageOutput implements PageOutput
|
|
131
145
|
List<Value> outputs = Lists.newArrayList();
|
132
146
|
for (Token token : tokens) {
|
133
147
|
logger.debug("token => {}, {}", token, token.getAllFeatures());
|
134
|
-
if (!isOkPartsOfSpeech(token)) {
|
148
|
+
if (!isOkPartsOfSpeech(token)) {
|
149
|
+
continue;
|
150
|
+
}
|
135
151
|
String word = null;
|
136
152
|
if ("base_form".equals(method)) {
|
137
153
|
word = MoreObjects.firstNonNull(token.getBaseForm(), token.getSurface());
|
138
|
-
}
|
154
|
+
}
|
155
|
+
else if ("reading".equals(method)) {
|
139
156
|
word = MoreObjects.firstNonNull(token.getReading(), token.getSurface());
|
140
|
-
}
|
157
|
+
}
|
158
|
+
else if ("surface_form".equals(method)) {
|
141
159
|
word = token.getSurface();
|
142
160
|
}
|
143
161
|
outputs.add(ValueFactory.newString(word));
|
@@ -145,15 +163,19 @@ public class KuromojiPageOutput implements PageOutput
|
|
145
163
|
if (outputColumn.getType().equals(Types.STRING)) {
|
146
164
|
Joiner joiner = Joiner.on(MoreObjects.firstNonNull(setting.get("delimiter"), ",")).skipNulls();
|
147
165
|
builder.setString(outputColumn, joiner.join(outputs));
|
148
|
-
}
|
166
|
+
}
|
167
|
+
else if (outputColumn.getType().equals(Types.JSON)) {
|
149
168
|
builder.setJson(outputColumn, ValueFactory.newArray(outputs));
|
150
169
|
}
|
151
170
|
}
|
152
171
|
}
|
153
172
|
}
|
154
173
|
|
155
|
-
private boolean isOkPartsOfSpeech(Token token)
|
156
|
-
|
174
|
+
private boolean isOkPartsOfSpeech(Token token)
|
175
|
+
{
|
176
|
+
if (!task.getOkPartsOfSpeech().isPresent()) {
|
177
|
+
return true;
|
178
|
+
}
|
157
179
|
for (String okPartsOfSpeech : task.getOkPartsOfSpeech().get()) {
|
158
180
|
if (token.getAllFeaturesArray()[0].equals(okPartsOfSpeech)) {
|
159
181
|
return true;
|
@@ -9,9 +9,9 @@ import java.util.List;
|
|
9
9
|
import java.util.Map;
|
10
10
|
import java.util.Set;
|
11
11
|
|
12
|
+
import org.apache.lucene.analysis.CharArraySet;
|
12
13
|
import org.apache.lucene.analysis.TokenStream;
|
13
14
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
14
|
-
import org.apache.lucene.analysis.util.CharArraySet;
|
15
15
|
import org.codelibs.neologd.ipadic.lucene.analysis.ja.JapaneseAnalyzer;
|
16
16
|
import org.codelibs.neologd.ipadic.lucene.analysis.ja.JapaneseTokenizer;
|
17
17
|
import org.codelibs.neologd.ipadic.lucene.analysis.ja.JapaneseTokenizer.Mode;
|
@@ -39,7 +39,6 @@ import com.google.common.base.MoreObjects;
|
|
39
39
|
import com.google.common.collect.Lists;
|
40
40
|
import com.google.common.collect.Sets;
|
41
41
|
|
42
|
-
|
43
42
|
public class NeologdPageOutput implements PageOutput
|
44
43
|
{
|
45
44
|
private final KuromojiFilterPlugin.PluginTask task;
|
@@ -51,7 +50,8 @@ public class NeologdPageOutput implements PageOutput
|
|
51
50
|
private final JapaneseAnalyzer japaneseAnalyzer;
|
52
51
|
private static final Logger logger = Exec.getLogger(KuromojiFilterPlugin.class);
|
53
52
|
|
54
|
-
public NeologdPageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output)
|
53
|
+
public NeologdPageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output)
|
54
|
+
{
|
55
55
|
this.task = taskSource.loadTask(PluginTask.class);
|
56
56
|
this.inputSchema = inputSchema;
|
57
57
|
this.outputSchema = outputSchema;
|
@@ -69,7 +69,8 @@ public class NeologdPageOutput implements PageOutput
|
|
69
69
|
File file = new File(task.getDictionaryPath().get());
|
70
70
|
Reader reader = new InputStreamReader(new FileInputStream(file), Charsets.UTF_8);
|
71
71
|
userDict = UserDictionary.open(reader);
|
72
|
-
}
|
72
|
+
}
|
73
|
+
catch (Exception e) {
|
73
74
|
logger.error("neologd error", e);
|
74
75
|
}
|
75
76
|
}
|
@@ -77,9 +78,11 @@ public class NeologdPageOutput implements PageOutput
|
|
77
78
|
Mode mode = null;
|
78
79
|
if (task.getMode().equals("normal")) {
|
79
80
|
mode = JapaneseTokenizer.Mode.NORMAL;
|
80
|
-
}
|
81
|
+
}
|
82
|
+
else if (task.getMode().equals("search")) {
|
81
83
|
mode = JapaneseTokenizer.Mode.SEARCH;
|
82
|
-
}
|
84
|
+
}
|
85
|
+
else if (task.getMode().equals("extended")) {
|
83
86
|
mode = JapaneseTokenizer.Mode.EXTENDED;
|
84
87
|
}
|
85
88
|
|
@@ -93,17 +96,20 @@ public class NeologdPageOutput implements PageOutput
|
|
93
96
|
}
|
94
97
|
|
95
98
|
@Override
|
96
|
-
public void finish()
|
99
|
+
public void finish()
|
100
|
+
{
|
97
101
|
builder.finish();
|
98
102
|
}
|
99
103
|
|
100
104
|
@Override
|
101
|
-
public void close()
|
105
|
+
public void close()
|
106
|
+
{
|
102
107
|
builder.close();
|
103
108
|
}
|
104
109
|
|
105
110
|
@Override
|
106
|
-
public void add(Page page)
|
111
|
+
public void add(Page page)
|
112
|
+
{
|
107
113
|
reader.setPage(page);
|
108
114
|
while (reader.nextRecord()) {
|
109
115
|
setValue(builder);
|
@@ -114,7 +120,8 @@ public class NeologdPageOutput implements PageOutput
|
|
114
120
|
/**
|
115
121
|
* @param builder
|
116
122
|
*/
|
117
|
-
private void setValue(PageBuilder builder)
|
123
|
+
private void setValue(PageBuilder builder)
|
124
|
+
{
|
118
125
|
if (task.getKeepInput()) {
|
119
126
|
for (Column inputColumn : inputSchema.getColumns()) {
|
120
127
|
if (reader.isNull(inputColumn)) {
|
@@ -123,15 +130,20 @@ public class NeologdPageOutput implements PageOutput
|
|
123
130
|
}
|
124
131
|
if (Types.STRING.equals(inputColumn.getType())) {
|
125
132
|
builder.setString(inputColumn, reader.getString(inputColumn));
|
126
|
-
}
|
133
|
+
}
|
134
|
+
else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
127
135
|
builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
|
128
|
-
}
|
136
|
+
}
|
137
|
+
else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
129
138
|
builder.setDouble(inputColumn, reader.getDouble(inputColumn));
|
130
|
-
}
|
139
|
+
}
|
140
|
+
else if (Types.LONG.equals(inputColumn.getType())) {
|
131
141
|
builder.setLong(inputColumn, reader.getLong(inputColumn));
|
132
|
-
}
|
142
|
+
}
|
143
|
+
else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
133
144
|
builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
|
134
|
-
}
|
145
|
+
}
|
146
|
+
else if (Types.JSON.equals(inputColumn.getType())) {
|
135
147
|
builder.setJson(inputColumn, reader.getJson(inputColumn));
|
136
148
|
}
|
137
149
|
}
|
@@ -149,9 +161,11 @@ public class NeologdPageOutput implements PageOutput
|
|
149
161
|
String word = null;
|
150
162
|
if ("base_form".equals(method)) {
|
151
163
|
word = token.getBaseForm();
|
152
|
-
}
|
164
|
+
}
|
165
|
+
else if ("reading".equals(method)) {
|
153
166
|
word = token.getReading();
|
154
|
-
}
|
167
|
+
}
|
168
|
+
else if ("surface_form".equals(method)) {
|
155
169
|
word = token.getCharTerm();
|
156
170
|
}
|
157
171
|
if (word != null) {
|
@@ -161,16 +175,20 @@ public class NeologdPageOutput implements PageOutput
|
|
161
175
|
if (outputColumn.getType().equals(Types.STRING)) {
|
162
176
|
Joiner joiner = Joiner.on(MoreObjects.firstNonNull(setting.get("delimiter"), ",")).skipNulls();
|
163
177
|
builder.setString(outputColumn, joiner.join(outputs));
|
164
|
-
}
|
178
|
+
}
|
179
|
+
else if (outputColumn.getType().equals(Types.JSON)) {
|
165
180
|
builder.setJson(outputColumn, ValueFactory.newArray(outputs));
|
166
181
|
}
|
167
182
|
}
|
168
183
|
}
|
169
184
|
}
|
170
185
|
|
171
|
-
private boolean isOkPartsOfSpeech(Token token)
|
186
|
+
private boolean isOkPartsOfSpeech(Token token)
|
187
|
+
{
|
172
188
|
logger.debug("{} => {}", token.getCharTerm(), token.getPartOfSpeech());
|
173
|
-
if (!task.getOkPartsOfSpeech().isPresent()) {
|
189
|
+
if (!task.getOkPartsOfSpeech().isPresent()) {
|
190
|
+
return true;
|
191
|
+
}
|
174
192
|
for (String okPartsOfSpeech : task.getOkPartsOfSpeech().get()) {
|
175
193
|
if (token.getPartOfSpeech().startsWith(okPartsOfSpeech)) {
|
176
194
|
return true;
|
@@ -179,9 +197,10 @@ public class NeologdPageOutput implements PageOutput
|
|
179
197
|
return false;
|
180
198
|
}
|
181
199
|
|
182
|
-
private List<Token> tokenize(Reader reader)
|
200
|
+
private List<Token> tokenize(Reader reader)
|
201
|
+
{
|
183
202
|
List<Token> list = Lists.newArrayList();
|
184
|
-
try (TokenStream tokenStream = japaneseAnalyzer.tokenStream("", reader)
|
203
|
+
try (TokenStream tokenStream = japaneseAnalyzer.tokenStream("", reader)) {
|
185
204
|
BaseFormAttribute baseAttr = tokenStream.addAttribute(BaseFormAttribute.class);
|
186
205
|
CharTermAttribute charAttr = tokenStream.addAttribute(CharTermAttribute.class);
|
187
206
|
PartOfSpeechAttribute posAttr = tokenStream.addAttribute(PartOfSpeechAttribute.class);
|
@@ -194,10 +213,13 @@ public class NeologdPageOutput implements PageOutput
|
|
194
213
|
token.setBaseForm(baseAttr.getBaseForm());
|
195
214
|
token.setReading(readAttr.getReading());
|
196
215
|
token.setPartOfSpeech(posAttr.getPartOfSpeech());
|
197
|
-
if (!isOkPartsOfSpeech(token)) {
|
216
|
+
if (!isOkPartsOfSpeech(token)) {
|
217
|
+
continue;
|
218
|
+
}
|
198
219
|
list.add(token);
|
199
220
|
}
|
200
|
-
}
|
221
|
+
}
|
222
|
+
catch (Exception e) {
|
201
223
|
logger.error("neologd error", e);
|
202
224
|
}
|
203
225
|
return list;
|
@@ -8,34 +8,44 @@ public class Token
|
|
8
8
|
private String reading;
|
9
9
|
private String inflection;
|
10
10
|
|
11
|
-
public String getCharTerm()
|
11
|
+
public String getCharTerm()
|
12
|
+
{
|
12
13
|
return charTerm;
|
13
14
|
}
|
14
|
-
public String getBaseForm()
|
15
|
+
public String getBaseForm()
|
16
|
+
{
|
15
17
|
return baseForm;
|
16
18
|
}
|
17
|
-
public String getPartOfSpeech()
|
19
|
+
public String getPartOfSpeech()
|
20
|
+
{
|
18
21
|
return partOfSpeech;
|
19
22
|
}
|
20
|
-
public void setCharTerm(String charTerm)
|
23
|
+
public void setCharTerm(String charTerm)
|
24
|
+
{
|
21
25
|
this.charTerm = charTerm;
|
22
26
|
}
|
23
|
-
public void setBaseForm(String baseForm)
|
27
|
+
public void setBaseForm(String baseForm)
|
28
|
+
{
|
24
29
|
this.baseForm = baseForm;
|
25
30
|
}
|
26
|
-
public void setPartOfSpeech(String partOfSpeech)
|
31
|
+
public void setPartOfSpeech(String partOfSpeech)
|
32
|
+
{
|
27
33
|
this.partOfSpeech = partOfSpeech;
|
28
34
|
}
|
29
|
-
public void setReading(String reading)
|
35
|
+
public void setReading(String reading)
|
36
|
+
{
|
30
37
|
this.reading = reading;
|
31
38
|
}
|
32
|
-
public String getReading()
|
39
|
+
public String getReading()
|
40
|
+
{
|
33
41
|
return reading;
|
34
42
|
}
|
35
|
-
public String getInflection()
|
43
|
+
public String getInflection()
|
44
|
+
{
|
36
45
|
return inflection;
|
37
46
|
}
|
38
|
-
public void setInflection(String inflection)
|
47
|
+
public void setInflection(String inflection)
|
48
|
+
{
|
39
49
|
this.inflection = inflection;
|
40
50
|
}
|
41
51
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-kuromoji
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -61,12 +61,12 @@ files:
|
|
61
61
|
- src/main/java/org/embulk/filter/kuromoji/NeologdPageOutput.java
|
62
62
|
- src/main/java/org/embulk/filter/kuromoji/Token.java
|
63
63
|
- src/test/java/org/embulk/filter/kuromoji/TestKuromojiFilterPlugin.java
|
64
|
-
- classpath/embulk-filter-kuromoji-0.
|
64
|
+
- classpath/embulk-filter-kuromoji-0.5.0.jar
|
65
65
|
- classpath/kuromoji-core-0.9.0.jar
|
66
66
|
- classpath/kuromoji-ipadic-0.9.0.jar
|
67
|
-
- classpath/lucene-analyzers-common-
|
68
|
-
- classpath/lucene-analyzers-kuromoji-ipadic-neologd-
|
69
|
-
- classpath/lucene-core-
|
67
|
+
- classpath/lucene-analyzers-common-6.2.1.jar
|
68
|
+
- classpath/lucene-analyzers-kuromoji-ipadic-neologd-6.2.1-20161201.jar
|
69
|
+
- classpath/lucene-core-6.2.1.jar
|
70
70
|
homepage: https://github.com/toyama0919/embulk-filter-kuromoji
|
71
71
|
licenses:
|
72
72
|
- MIT
|
@@ -92,4 +92,3 @@ signing_key:
|
|
92
92
|
specification_version: 4
|
93
93
|
summary: Kuromoji filter plugin for Embulk. Neologd support.
|
94
94
|
test_files: []
|
95
|
-
has_rdoc:
|