logstash-filter-languagedetect 0.0.1.pre.alpha.pre.12 → 0.1.0.alpha.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/Gemfile +1 -2
- data/lib/logstash/filters/languagedetect.rb +37 -12
- data/logstash-filter-languagedetect.gemspec +2 -2
- data/spec/filters/languagedetect_spec.rb +42 -9
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NTgxOTYxMjU0ZTRhNzUyMDY0MzI0NjFhODFlNGE1YmI0OWJiM2NhMw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MWQ3ZmI3ZTBmNjg5YTEwYjZhOTRhMTU3ZTllN2Y1OGMxZTFlYzk0MA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NjI0NjBhODQyYzgyODYzZmJhZTMyZjc4NGE1YjQ2NDNhYTEyOWE2MTRiNmZl
|
10
|
+
NDZiMDhiZjRjZDJmNmZiNjM2YTU3ZDBhYTFiNGZlNzE3MDQ1OGZlMzE1NGJj
|
11
|
+
NzMwNzEwNGNhODZmNjI2NWNiNzdhMDhlMzZkZmQ1MWZiYzliZDE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MzZhZTU4MzM0ZTlhZGQxNWZkYjE5OWQ2ZDU3MTcwODgzM2EyNzkwZDFlZjA1
|
14
|
+
MGEzNjdiODVhZjk2ODc0OGQ4YTQyN2QyYTcyYzE5OWFkZDYyMDZhNWIzZGQ4
|
15
|
+
MDNiMWQ5MTRmNmJkNWVkZmVmNWVmYmI4MTNjNGRlMDcwYjlkZjQ=
|
data/Gemfile
CHANGED
@@ -1,31 +1,58 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
require "logstash/filters/base"
|
3
2
|
require "logstash/namespace"
|
4
3
|
require 'whatlanguage'
|
5
|
-
# require 'whatlanguage/string'
|
6
4
|
|
7
|
-
# This
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# It is only intended to be used as an example.
|
5
|
+
# This filter will try to determine the language of the field given
|
6
|
+
# by the source parameter and output the result in either the language
|
7
|
+
# name (in English lowercase) or iso format
|
11
8
|
class LogStash::Filters::Languagedetect < LogStash::Filters::Base
|
12
9
|
|
13
10
|
# Setting the config_name here is required. This is how you
|
14
11
|
# configure this filter from your Logstash config.
|
15
12
|
#
|
16
13
|
# filter {
|
17
|
-
#
|
18
|
-
#
|
14
|
+
# languagedetect {
|
15
|
+
# source => "message"
|
16
|
+
# target => "lang"
|
17
|
+
# useiso => true
|
19
18
|
# }
|
20
19
|
# }
|
21
20
|
#
|
22
21
|
config_name "languagedetect"
|
23
22
|
|
24
|
-
#
|
23
|
+
# Set the source field which is used for the language check.
|
24
|
+
#
|
25
|
+
# Example:
|
26
|
+
# [source,ruby]
|
27
|
+
# filter {
|
28
|
+
# languagedetect {
|
29
|
+
# # tries to determine the language in "customfield"
|
30
|
+
# source => "customfield"
|
31
|
+
# }
|
32
|
+
# }
|
25
33
|
config :source, :validate => :string, :default => "message"
|
26
34
|
|
35
|
+
# Set the result field in which the result will be stored.
|
36
|
+
#
|
37
|
+
# Example:
|
38
|
+
# [source,ruby]
|
39
|
+
# filter {
|
40
|
+
# languagedetect {
|
41
|
+
# # outputs the result into "customoutput"
|
42
|
+
# target => "customoutput"
|
43
|
+
# }
|
44
|
+
# }
|
27
45
|
config :target, :validate => :string, :default => "lang"
|
28
46
|
|
47
|
+
# If useiso is true, result will be "en" instead of "english"
|
48
|
+
#
|
49
|
+
# Example:
|
50
|
+
# [source,ruby]
|
51
|
+
# filter {
|
52
|
+
# languagedetect {
|
53
|
+
# useiso => true
|
54
|
+
# }
|
55
|
+
# }
|
29
56
|
config :useiso, :validate => :boolean, :default => false
|
30
57
|
|
31
58
|
|
@@ -40,13 +67,11 @@ class LogStash::Filters::Languagedetect < LogStash::Filters::Base
|
|
40
67
|
text_to_detect = event[@source]
|
41
68
|
if @useiso
|
42
69
|
lang_found = @wl.language_iso(text_to_detect)
|
43
|
-
# lang_found = text_to_detect.language_iso
|
44
70
|
else
|
45
71
|
lang_found = @wl.language(text_to_detect)
|
46
|
-
# lang_found = text_to_detect.language
|
47
72
|
end
|
48
73
|
|
49
|
-
event[@target] = lang_found
|
74
|
+
event[@target] = lang_found.to_s.force_encoding(Encoding::UTF_8)
|
50
75
|
|
51
76
|
# filter_matched should go in the last line of our successful code
|
52
77
|
filter_matched(event)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-filter-languagedetect'
|
3
|
-
s.version = '0.0
|
4
|
-
s.version = "#{s.version}
|
3
|
+
s.version = '0.1.0'
|
4
|
+
s.version = "#{s.version}.alpha.#{ENV['TRAVIS_BUILD_NUMBER']}" if ENV['TRAVIS']
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "This example filter replaces the contents of the message field with the specified value."
|
7
7
|
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
@@ -1,7 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require File.absolute_path(File.join(File.dirname(__FILE__), '../../spec/spec_helper'))
|
3
3
|
require File.absolute_path(File.join(File.dirname(__FILE__), '../../lib/logstash/filters/languagedetect'))
|
4
|
-
# require "lib/logstash/filters/languagedetect"
|
5
4
|
|
6
5
|
describe LogStash::Filters::Languagedetect do
|
7
6
|
describe "with defaults and non iso languages" do
|
@@ -14,22 +13,22 @@ describe LogStash::Filters::Languagedetect do
|
|
14
13
|
|
15
14
|
sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
|
16
15
|
expect(subject).to include('lang')
|
17
|
-
expect(subject['lang']).to eq(
|
16
|
+
expect(subject['lang']).to eq("english")
|
18
17
|
end
|
19
18
|
|
20
19
|
sample("message" => "dies ist ein deutscher Text") do
|
21
20
|
expect(subject).to include('lang')
|
22
|
-
expect(subject['lang']).to eq(
|
21
|
+
expect(subject['lang']).to eq("german")
|
23
22
|
end
|
24
23
|
|
25
24
|
sample("message" => "Deux autres personnes ont été arrêtées durant la nuit") do
|
26
25
|
expect(subject).to include('lang')
|
27
|
-
expect(subject['lang']).to eq(
|
26
|
+
expect(subject['lang']).to eq("french")
|
28
27
|
end
|
29
28
|
|
30
29
|
sample("message" => "En estado de máxima alertaen su nivel de crítico") do
|
31
30
|
expect(subject).to include('lang')
|
32
|
-
expect(subject['lang']).to eq(
|
31
|
+
expect(subject['lang']).to eq("spanish")
|
33
32
|
end
|
34
33
|
end
|
35
34
|
|
@@ -45,22 +44,56 @@ describe LogStash::Filters::Languagedetect do
|
|
45
44
|
|
46
45
|
sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
|
47
46
|
expect(subject).to include('lang')
|
48
|
-
expect(subject['lang']).to eq(
|
47
|
+
expect(subject['lang']).to eq("en")
|
49
48
|
end
|
50
49
|
|
51
50
|
sample("message" => "dies ist ein deutscher Text") do
|
52
51
|
expect(subject).to include('lang')
|
53
|
-
expect(subject['lang']).to eq(
|
52
|
+
expect(subject['lang']).to eq("de")
|
54
53
|
end
|
55
54
|
|
56
55
|
sample("message" => "Deux autres personnes ont été arrêtées durant la nuit") do
|
57
56
|
expect(subject).to include('lang')
|
58
|
-
expect(subject['lang']).to eq(
|
57
|
+
expect(subject['lang']).to eq("fr")
|
59
58
|
end
|
60
59
|
|
61
60
|
sample("message" => "En estado de máxima alertaen su nivel de crítico") do
|
62
61
|
expect(subject).to include('lang')
|
63
|
-
expect(subject['lang']).to eq(
|
62
|
+
expect(subject['lang']).to eq("es")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe "non default target" do
|
67
|
+
let(:config) do <<-CONFIG
|
68
|
+
filter {
|
69
|
+
languagedetect {
|
70
|
+
useiso => true
|
71
|
+
target => 'test'
|
72
|
+
}
|
73
|
+
}
|
74
|
+
CONFIG
|
75
|
+
end
|
76
|
+
|
77
|
+
sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
|
78
|
+
expect(subject).to include('test')
|
79
|
+
expect(subject['test']).to eq("en")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe "non default source" do
|
84
|
+
let(:config) do <<-CONFIG
|
85
|
+
filter {
|
86
|
+
languagedetect {
|
87
|
+
useiso => true
|
88
|
+
source => 'test'
|
89
|
+
}
|
90
|
+
}
|
91
|
+
CONFIG
|
92
|
+
end
|
93
|
+
|
94
|
+
sample("test" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
|
95
|
+
expect(subject).to include('lang')
|
96
|
+
expect(subject['lang']).to eq("en")
|
64
97
|
end
|
65
98
|
end
|
66
99
|
end
|