embulk-filter-kuromoji 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/build.gradle +5 -5
- data/src/main/java/org/embulk/filter/kuromoji/KuromojiFilterPlugin.java +6 -4
- data/src/main/java/org/embulk/filter/kuromoji/KuromojiPageOutput.java +42 -20
- data/src/main/java/org/embulk/filter/kuromoji/NeologdPageOutput.java +46 -24
- data/src/main/java/org/embulk/filter/kuromoji/Token.java +20 -10
- metadata +6 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f761d94b92551164712a27d55503e5fb44bf9530
|
4
|
+
data.tar.gz: 71d3c24850363da272c604e1bbffe62821c1e1f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 95ffe8e33a1b6c0d2be2c7985dcc8474e786dcbc82899a89e5091155f4a2e3b4141a5308bd38807e4644f839a0ee91fae73d5a498f0e16b64a2cab98607534f9
|
7
|
+
data.tar.gz: ac70dbc1e7f830758a5da0ee46df782f3b95af2367184b78f511405621d9273b7e86b907f33987dbfaed4cdcf9ffdd3c62381d16c07401ab8df346c0023c01a5
|
data/build.gradle
CHANGED
@@ -17,18 +17,18 @@ configurations {
|
|
17
17
|
provided
|
18
18
|
}
|
19
19
|
|
20
|
-
version = "0.
|
20
|
+
version = "0.5.0"
|
21
21
|
|
22
22
|
sourceCompatibility = 1.7
|
23
23
|
targetCompatibility = 1.7
|
24
24
|
|
25
25
|
dependencies {
|
26
|
-
compile "org.embulk:embulk-core:0.8.
|
26
|
+
compile "org.embulk:embulk-core:0.8.15"
|
27
27
|
compile 'com.atilika.kuromoji:kuromoji-ipadic:0.9.0'
|
28
|
-
compile "org.codelibs:lucene-analyzers-kuromoji-ipadic-neologd:
|
29
|
-
provided "org.embulk:embulk-core:0.8.
|
28
|
+
compile "org.codelibs:lucene-analyzers-kuromoji-ipadic-neologd:6.2.1-20161201"
|
29
|
+
provided "org.embulk:embulk-core:0.8.15"
|
30
30
|
testCompile "junit:junit:4.+"
|
31
|
-
testCompile "org.embulk:embulk-core:0.8.
|
31
|
+
testCompile "org.embulk:embulk-core:0.8.15"
|
32
32
|
}
|
33
33
|
|
34
34
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
@@ -75,7 +75,7 @@ public class KuromojiFilterPlugin implements FilterPlugin
|
|
75
75
|
{
|
76
76
|
final String tokenizer = taskSource.loadTask(PluginTask.class).getTokenizer();
|
77
77
|
logger.info("Tokenizer => {}", tokenizer);
|
78
|
-
if (tokenizer.equals("neologd")){
|
78
|
+
if (tokenizer.equals("neologd")) {
|
79
79
|
return new NeologdPageOutput(taskSource, inputSchema, outputSchema, output);
|
80
80
|
}
|
81
81
|
return new KuromojiPageOutput(taskSource, inputSchema, outputSchema, output);
|
@@ -86,7 +86,8 @@ public class KuromojiFilterPlugin implements FilterPlugin
|
|
86
86
|
* @param task
|
87
87
|
* @return
|
88
88
|
*/
|
89
|
-
private Schema buildOutputSchema(PluginTask task, Schema inputSchema)
|
89
|
+
private Schema buildOutputSchema(PluginTask task, Schema inputSchema)
|
90
|
+
{
|
90
91
|
final List<Column> outputColumns = buildOutputColumns(task, inputSchema);
|
91
92
|
logger.debug("outputColumns => {}", outputColumns);
|
92
93
|
return new Schema(outputColumns);
|
@@ -97,7 +98,8 @@ public class KuromojiFilterPlugin implements FilterPlugin
|
|
97
98
|
* @param inputSchema
|
98
99
|
* @return
|
99
100
|
*/
|
100
|
-
private List<Column> buildOutputColumns(PluginTask task, Schema inputSchema)
|
101
|
+
private List<Column> buildOutputColumns(PluginTask task, Schema inputSchema)
|
102
|
+
{
|
101
103
|
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
102
104
|
Map<String, Column> map = Maps.newLinkedHashMap();
|
103
105
|
int i = 0;
|
@@ -117,7 +119,7 @@ public class KuromojiFilterPlugin implements FilterPlugin
|
|
117
119
|
}
|
118
120
|
|
119
121
|
i = 0;
|
120
|
-
for(Map.Entry<String, Column> e : map.entrySet()) {
|
122
|
+
for (Map.Entry<String, Column> e : map.entrySet()) {
|
121
123
|
final Column column = e.getValue();
|
122
124
|
builder.add(new Column(i++, column.getName(), column.getType()));
|
123
125
|
}
|
@@ -39,7 +39,8 @@ public class KuromojiPageOutput implements PageOutput
|
|
39
39
|
private final Schema outputSchema;
|
40
40
|
private static final Logger logger = Exec.getLogger(KuromojiFilterPlugin.class);
|
41
41
|
|
42
|
-
public KuromojiPageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output)
|
42
|
+
public KuromojiPageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output)
|
43
|
+
{
|
43
44
|
this.task = taskSource.loadTask(PluginTask.class);
|
44
45
|
this.inputSchema = inputSchema;
|
45
46
|
this.outputSchema = outputSchema;
|
@@ -48,9 +49,11 @@ public class KuromojiPageOutput implements PageOutput
|
|
48
49
|
if (task.getDictionaryPath().isPresent()) {
|
49
50
|
try {
|
50
51
|
builder.userDictionary(task.getDictionaryPath().get());
|
51
|
-
}
|
52
|
+
}
|
53
|
+
catch (FileNotFoundException e) {
|
52
54
|
e.printStackTrace();
|
53
|
-
}
|
55
|
+
}
|
56
|
+
catch (IOException e) {
|
54
57
|
e.printStackTrace();
|
55
58
|
}
|
56
59
|
}
|
@@ -58,9 +61,11 @@ public class KuromojiPageOutput implements PageOutput
|
|
58
61
|
Mode mode = null;
|
59
62
|
if (task.getMode().equals("normal")) {
|
60
63
|
mode = Mode.NORMAL;
|
61
|
-
}
|
64
|
+
}
|
65
|
+
else if (task.getMode().equals("search")) {
|
62
66
|
mode = Mode.SEARCH;
|
63
|
-
}
|
67
|
+
}
|
68
|
+
else if (task.getMode().equals("extended")) {
|
64
69
|
mode = Mode.EXTENDED;
|
65
70
|
}
|
66
71
|
|
@@ -76,17 +81,20 @@ public class KuromojiPageOutput implements PageOutput
|
|
76
81
|
}
|
77
82
|
|
78
83
|
@Override
|
79
|
-
public void finish()
|
84
|
+
public void finish()
|
85
|
+
{
|
80
86
|
builder.finish();
|
81
87
|
}
|
82
88
|
|
83
89
|
@Override
|
84
|
-
public void close()
|
90
|
+
public void close()
|
91
|
+
{
|
85
92
|
builder.close();
|
86
93
|
}
|
87
94
|
|
88
95
|
@Override
|
89
|
-
public void add(Page page)
|
96
|
+
public void add(Page page)
|
97
|
+
{
|
90
98
|
reader.setPage(page);
|
91
99
|
while (reader.nextRecord()) {
|
92
100
|
setValue(builder);
|
@@ -97,7 +105,8 @@ public class KuromojiPageOutput implements PageOutput
|
|
97
105
|
/**
|
98
106
|
* @param builder
|
99
107
|
*/
|
100
|
-
private void setValue(PageBuilder builder)
|
108
|
+
private void setValue(PageBuilder builder)
|
109
|
+
{
|
101
110
|
if (task.getKeepInput()) {
|
102
111
|
for (Column inputColumn : inputSchema.getColumns()) {
|
103
112
|
if (reader.isNull(inputColumn)) {
|
@@ -106,15 +115,20 @@ public class KuromojiPageOutput implements PageOutput
|
|
106
115
|
}
|
107
116
|
if (Types.STRING.equals(inputColumn.getType())) {
|
108
117
|
builder.setString(inputColumn, reader.getString(inputColumn));
|
109
|
-
}
|
118
|
+
}
|
119
|
+
else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
110
120
|
builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
|
111
|
-
}
|
121
|
+
}
|
122
|
+
else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
112
123
|
builder.setDouble(inputColumn, reader.getDouble(inputColumn));
|
113
|
-
}
|
124
|
+
}
|
125
|
+
else if (Types.LONG.equals(inputColumn.getType())) {
|
114
126
|
builder.setLong(inputColumn, reader.getLong(inputColumn));
|
115
|
-
}
|
127
|
+
}
|
128
|
+
else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
116
129
|
builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
|
117
|
-
}
|
130
|
+
}
|
131
|
+
else if (Types.JSON.equals(inputColumn.getType())) {
|
118
132
|
builder.setJson(inputColumn, reader.getJson(inputColumn));
|
119
133
|
}
|
120
134
|
}
|
@@ -131,13 +145,17 @@ public class KuromojiPageOutput implements PageOutput
|
|
131
145
|
List<Value> outputs = Lists.newArrayList();
|
132
146
|
for (Token token : tokens) {
|
133
147
|
logger.debug("token => {}, {}", token, token.getAllFeatures());
|
134
|
-
if (!isOkPartsOfSpeech(token)) {
|
148
|
+
if (!isOkPartsOfSpeech(token)) {
|
149
|
+
continue;
|
150
|
+
}
|
135
151
|
String word = null;
|
136
152
|
if ("base_form".equals(method)) {
|
137
153
|
word = MoreObjects.firstNonNull(token.getBaseForm(), token.getSurface());
|
138
|
-
}
|
154
|
+
}
|
155
|
+
else if ("reading".equals(method)) {
|
139
156
|
word = MoreObjects.firstNonNull(token.getReading(), token.getSurface());
|
140
|
-
}
|
157
|
+
}
|
158
|
+
else if ("surface_form".equals(method)) {
|
141
159
|
word = token.getSurface();
|
142
160
|
}
|
143
161
|
outputs.add(ValueFactory.newString(word));
|
@@ -145,15 +163,19 @@ public class KuromojiPageOutput implements PageOutput
|
|
145
163
|
if (outputColumn.getType().equals(Types.STRING)) {
|
146
164
|
Joiner joiner = Joiner.on(MoreObjects.firstNonNull(setting.get("delimiter"), ",")).skipNulls();
|
147
165
|
builder.setString(outputColumn, joiner.join(outputs));
|
148
|
-
}
|
166
|
+
}
|
167
|
+
else if (outputColumn.getType().equals(Types.JSON)) {
|
149
168
|
builder.setJson(outputColumn, ValueFactory.newArray(outputs));
|
150
169
|
}
|
151
170
|
}
|
152
171
|
}
|
153
172
|
}
|
154
173
|
|
155
|
-
private boolean isOkPartsOfSpeech(Token token)
|
156
|
-
|
174
|
+
private boolean isOkPartsOfSpeech(Token token)
|
175
|
+
{
|
176
|
+
if (!task.getOkPartsOfSpeech().isPresent()) {
|
177
|
+
return true;
|
178
|
+
}
|
157
179
|
for (String okPartsOfSpeech : task.getOkPartsOfSpeech().get()) {
|
158
180
|
if (token.getAllFeaturesArray()[0].equals(okPartsOfSpeech)) {
|
159
181
|
return true;
|
@@ -9,9 +9,9 @@ import java.util.List;
|
|
9
9
|
import java.util.Map;
|
10
10
|
import java.util.Set;
|
11
11
|
|
12
|
+
import org.apache.lucene.analysis.CharArraySet;
|
12
13
|
import org.apache.lucene.analysis.TokenStream;
|
13
14
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
14
|
-
import org.apache.lucene.analysis.util.CharArraySet;
|
15
15
|
import org.codelibs.neologd.ipadic.lucene.analysis.ja.JapaneseAnalyzer;
|
16
16
|
import org.codelibs.neologd.ipadic.lucene.analysis.ja.JapaneseTokenizer;
|
17
17
|
import org.codelibs.neologd.ipadic.lucene.analysis.ja.JapaneseTokenizer.Mode;
|
@@ -39,7 +39,6 @@ import com.google.common.base.MoreObjects;
|
|
39
39
|
import com.google.common.collect.Lists;
|
40
40
|
import com.google.common.collect.Sets;
|
41
41
|
|
42
|
-
|
43
42
|
public class NeologdPageOutput implements PageOutput
|
44
43
|
{
|
45
44
|
private final KuromojiFilterPlugin.PluginTask task;
|
@@ -51,7 +50,8 @@ public class NeologdPageOutput implements PageOutput
|
|
51
50
|
private final JapaneseAnalyzer japaneseAnalyzer;
|
52
51
|
private static final Logger logger = Exec.getLogger(KuromojiFilterPlugin.class);
|
53
52
|
|
54
|
-
public NeologdPageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output)
|
53
|
+
public NeologdPageOutput(TaskSource taskSource, Schema inputSchema, Schema outputSchema, PageOutput output)
|
54
|
+
{
|
55
55
|
this.task = taskSource.loadTask(PluginTask.class);
|
56
56
|
this.inputSchema = inputSchema;
|
57
57
|
this.outputSchema = outputSchema;
|
@@ -69,7 +69,8 @@ public class NeologdPageOutput implements PageOutput
|
|
69
69
|
File file = new File(task.getDictionaryPath().get());
|
70
70
|
Reader reader = new InputStreamReader(new FileInputStream(file), Charsets.UTF_8);
|
71
71
|
userDict = UserDictionary.open(reader);
|
72
|
-
}
|
72
|
+
}
|
73
|
+
catch (Exception e) {
|
73
74
|
logger.error("neologd error", e);
|
74
75
|
}
|
75
76
|
}
|
@@ -77,9 +78,11 @@ public class NeologdPageOutput implements PageOutput
|
|
77
78
|
Mode mode = null;
|
78
79
|
if (task.getMode().equals("normal")) {
|
79
80
|
mode = JapaneseTokenizer.Mode.NORMAL;
|
80
|
-
}
|
81
|
+
}
|
82
|
+
else if (task.getMode().equals("search")) {
|
81
83
|
mode = JapaneseTokenizer.Mode.SEARCH;
|
82
|
-
}
|
84
|
+
}
|
85
|
+
else if (task.getMode().equals("extended")) {
|
83
86
|
mode = JapaneseTokenizer.Mode.EXTENDED;
|
84
87
|
}
|
85
88
|
|
@@ -93,17 +96,20 @@ public class NeologdPageOutput implements PageOutput
|
|
93
96
|
}
|
94
97
|
|
95
98
|
@Override
|
96
|
-
public void finish()
|
99
|
+
public void finish()
|
100
|
+
{
|
97
101
|
builder.finish();
|
98
102
|
}
|
99
103
|
|
100
104
|
@Override
|
101
|
-
public void close()
|
105
|
+
public void close()
|
106
|
+
{
|
102
107
|
builder.close();
|
103
108
|
}
|
104
109
|
|
105
110
|
@Override
|
106
|
-
public void add(Page page)
|
111
|
+
public void add(Page page)
|
112
|
+
{
|
107
113
|
reader.setPage(page);
|
108
114
|
while (reader.nextRecord()) {
|
109
115
|
setValue(builder);
|
@@ -114,7 +120,8 @@ public class NeologdPageOutput implements PageOutput
|
|
114
120
|
/**
|
115
121
|
* @param builder
|
116
122
|
*/
|
117
|
-
private void setValue(PageBuilder builder)
|
123
|
+
private void setValue(PageBuilder builder)
|
124
|
+
{
|
118
125
|
if (task.getKeepInput()) {
|
119
126
|
for (Column inputColumn : inputSchema.getColumns()) {
|
120
127
|
if (reader.isNull(inputColumn)) {
|
@@ -123,15 +130,20 @@ public class NeologdPageOutput implements PageOutput
|
|
123
130
|
}
|
124
131
|
if (Types.STRING.equals(inputColumn.getType())) {
|
125
132
|
builder.setString(inputColumn, reader.getString(inputColumn));
|
126
|
-
}
|
133
|
+
}
|
134
|
+
else if (Types.BOOLEAN.equals(inputColumn.getType())) {
|
127
135
|
builder.setBoolean(inputColumn, reader.getBoolean(inputColumn));
|
128
|
-
}
|
136
|
+
}
|
137
|
+
else if (Types.DOUBLE.equals(inputColumn.getType())) {
|
129
138
|
builder.setDouble(inputColumn, reader.getDouble(inputColumn));
|
130
|
-
}
|
139
|
+
}
|
140
|
+
else if (Types.LONG.equals(inputColumn.getType())) {
|
131
141
|
builder.setLong(inputColumn, reader.getLong(inputColumn));
|
132
|
-
}
|
142
|
+
}
|
143
|
+
else if (Types.TIMESTAMP.equals(inputColumn.getType())) {
|
133
144
|
builder.setTimestamp(inputColumn, reader.getTimestamp(inputColumn));
|
134
|
-
}
|
145
|
+
}
|
146
|
+
else if (Types.JSON.equals(inputColumn.getType())) {
|
135
147
|
builder.setJson(inputColumn, reader.getJson(inputColumn));
|
136
148
|
}
|
137
149
|
}
|
@@ -149,9 +161,11 @@ public class NeologdPageOutput implements PageOutput
|
|
149
161
|
String word = null;
|
150
162
|
if ("base_form".equals(method)) {
|
151
163
|
word = token.getBaseForm();
|
152
|
-
}
|
164
|
+
}
|
165
|
+
else if ("reading".equals(method)) {
|
153
166
|
word = token.getReading();
|
154
|
-
}
|
167
|
+
}
|
168
|
+
else if ("surface_form".equals(method)) {
|
155
169
|
word = token.getCharTerm();
|
156
170
|
}
|
157
171
|
if (word != null) {
|
@@ -161,16 +175,20 @@ public class NeologdPageOutput implements PageOutput
|
|
161
175
|
if (outputColumn.getType().equals(Types.STRING)) {
|
162
176
|
Joiner joiner = Joiner.on(MoreObjects.firstNonNull(setting.get("delimiter"), ",")).skipNulls();
|
163
177
|
builder.setString(outputColumn, joiner.join(outputs));
|
164
|
-
}
|
178
|
+
}
|
179
|
+
else if (outputColumn.getType().equals(Types.JSON)) {
|
165
180
|
builder.setJson(outputColumn, ValueFactory.newArray(outputs));
|
166
181
|
}
|
167
182
|
}
|
168
183
|
}
|
169
184
|
}
|
170
185
|
|
171
|
-
private boolean isOkPartsOfSpeech(Token token)
|
186
|
+
private boolean isOkPartsOfSpeech(Token token)
|
187
|
+
{
|
172
188
|
logger.debug("{} => {}", token.getCharTerm(), token.getPartOfSpeech());
|
173
|
-
if (!task.getOkPartsOfSpeech().isPresent()) {
|
189
|
+
if (!task.getOkPartsOfSpeech().isPresent()) {
|
190
|
+
return true;
|
191
|
+
}
|
174
192
|
for (String okPartsOfSpeech : task.getOkPartsOfSpeech().get()) {
|
175
193
|
if (token.getPartOfSpeech().startsWith(okPartsOfSpeech)) {
|
176
194
|
return true;
|
@@ -179,9 +197,10 @@ public class NeologdPageOutput implements PageOutput
|
|
179
197
|
return false;
|
180
198
|
}
|
181
199
|
|
182
|
-
private List<Token> tokenize(Reader reader)
|
200
|
+
private List<Token> tokenize(Reader reader)
|
201
|
+
{
|
183
202
|
List<Token> list = Lists.newArrayList();
|
184
|
-
try (TokenStream tokenStream = japaneseAnalyzer.tokenStream("", reader)
|
203
|
+
try (TokenStream tokenStream = japaneseAnalyzer.tokenStream("", reader)) {
|
185
204
|
BaseFormAttribute baseAttr = tokenStream.addAttribute(BaseFormAttribute.class);
|
186
205
|
CharTermAttribute charAttr = tokenStream.addAttribute(CharTermAttribute.class);
|
187
206
|
PartOfSpeechAttribute posAttr = tokenStream.addAttribute(PartOfSpeechAttribute.class);
|
@@ -194,10 +213,13 @@ public class NeologdPageOutput implements PageOutput
|
|
194
213
|
token.setBaseForm(baseAttr.getBaseForm());
|
195
214
|
token.setReading(readAttr.getReading());
|
196
215
|
token.setPartOfSpeech(posAttr.getPartOfSpeech());
|
197
|
-
if (!isOkPartsOfSpeech(token)) {
|
216
|
+
if (!isOkPartsOfSpeech(token)) {
|
217
|
+
continue;
|
218
|
+
}
|
198
219
|
list.add(token);
|
199
220
|
}
|
200
|
-
}
|
221
|
+
}
|
222
|
+
catch (Exception e) {
|
201
223
|
logger.error("neologd error", e);
|
202
224
|
}
|
203
225
|
return list;
|
@@ -8,34 +8,44 @@ public class Token
|
|
8
8
|
private String reading;
|
9
9
|
private String inflection;
|
10
10
|
|
11
|
-
public String getCharTerm()
|
11
|
+
public String getCharTerm()
|
12
|
+
{
|
12
13
|
return charTerm;
|
13
14
|
}
|
14
|
-
public String getBaseForm()
|
15
|
+
public String getBaseForm()
|
16
|
+
{
|
15
17
|
return baseForm;
|
16
18
|
}
|
17
|
-
public String getPartOfSpeech()
|
19
|
+
public String getPartOfSpeech()
|
20
|
+
{
|
18
21
|
return partOfSpeech;
|
19
22
|
}
|
20
|
-
public void setCharTerm(String charTerm)
|
23
|
+
public void setCharTerm(String charTerm)
|
24
|
+
{
|
21
25
|
this.charTerm = charTerm;
|
22
26
|
}
|
23
|
-
public void setBaseForm(String baseForm)
|
27
|
+
public void setBaseForm(String baseForm)
|
28
|
+
{
|
24
29
|
this.baseForm = baseForm;
|
25
30
|
}
|
26
|
-
public void setPartOfSpeech(String partOfSpeech)
|
31
|
+
public void setPartOfSpeech(String partOfSpeech)
|
32
|
+
{
|
27
33
|
this.partOfSpeech = partOfSpeech;
|
28
34
|
}
|
29
|
-
public void setReading(String reading)
|
35
|
+
public void setReading(String reading)
|
36
|
+
{
|
30
37
|
this.reading = reading;
|
31
38
|
}
|
32
|
-
public String getReading()
|
39
|
+
public String getReading()
|
40
|
+
{
|
33
41
|
return reading;
|
34
42
|
}
|
35
|
-
public String getInflection()
|
43
|
+
public String getInflection()
|
44
|
+
{
|
36
45
|
return inflection;
|
37
46
|
}
|
38
|
-
public void setInflection(String inflection)
|
47
|
+
public void setInflection(String inflection)
|
48
|
+
{
|
39
49
|
this.inflection = inflection;
|
40
50
|
}
|
41
51
|
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-filter-kuromoji
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- toyama0919
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -61,12 +61,12 @@ files:
|
|
61
61
|
- src/main/java/org/embulk/filter/kuromoji/NeologdPageOutput.java
|
62
62
|
- src/main/java/org/embulk/filter/kuromoji/Token.java
|
63
63
|
- src/test/java/org/embulk/filter/kuromoji/TestKuromojiFilterPlugin.java
|
64
|
-
- classpath/embulk-filter-kuromoji-0.
|
64
|
+
- classpath/embulk-filter-kuromoji-0.5.0.jar
|
65
65
|
- classpath/kuromoji-core-0.9.0.jar
|
66
66
|
- classpath/kuromoji-ipadic-0.9.0.jar
|
67
|
-
- classpath/lucene-analyzers-common-
|
68
|
-
- classpath/lucene-analyzers-kuromoji-ipadic-neologd-
|
69
|
-
- classpath/lucene-core-
|
67
|
+
- classpath/lucene-analyzers-common-6.2.1.jar
|
68
|
+
- classpath/lucene-analyzers-kuromoji-ipadic-neologd-6.2.1-20161201.jar
|
69
|
+
- classpath/lucene-core-6.2.1.jar
|
70
70
|
homepage: https://github.com/toyama0919/embulk-filter-kuromoji
|
71
71
|
licenses:
|
72
72
|
- MIT
|
@@ -92,4 +92,3 @@ signing_key:
|
|
92
92
|
specification_version: 4
|
93
93
|
summary: Kuromoji filter plugin for Embulk. Neologd support.
|
94
94
|
test_files: []
|
95
|
-
has_rdoc:
|