logstash-filter-languagedetect 0.0.1.pre.alpha.pre.12 → 0.1.0.alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/Gemfile +1 -2
- data/lib/logstash/filters/languagedetect.rb +37 -12
- data/logstash-filter-languagedetect.gemspec +2 -2
- data/spec/filters/languagedetect_spec.rb +42 -9
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NTgxOTYxMjU0ZTRhNzUyMDY0MzI0NjFhODFlNGE1YmI0OWJiM2NhMw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MWQ3ZmI3ZTBmNjg5YTEwYjZhOTRhMTU3ZTllN2Y1OGMxZTFlYzk0MA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NjI0NjBhODQyYzgyODYzZmJhZTMyZjc4NGE1YjQ2NDNhYTEyOWE2MTRiNmZl
|
10
|
+
NDZiMDhiZjRjZDJmNmZiNjM2YTU3ZDBhYTFiNGZlNzE3MDQ1OGZlMzE1NGJj
|
11
|
+
NzMwNzEwNGNhODZmNjI2NWNiNzdhMDhlMzZkZmQ1MWZiYzliZDE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MzZhZTU4MzM0ZTlhZGQxNWZkYjE5OWQ2ZDU3MTcwODgzM2EyNzkwZDFlZjA1
|
14
|
+
MGEzNjdiODVhZjk2ODc0OGQ4YTQyN2QyYTcyYzE5OWFkZDYyMDZhNWIzZGQ4
|
15
|
+
MDNiMWQ5MTRmNmJkNWVkZmVmNWVmYmI4MTNjNGRlMDcwYjlkZjQ=
|
data/Gemfile
CHANGED
@@ -1,31 +1,58 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
require "logstash/filters/base"
|
3
2
|
require "logstash/namespace"
|
4
3
|
require 'whatlanguage'
|
5
|
-
# require 'whatlanguage/string'
|
6
4
|
|
7
|
-
# This
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# It is only intended to be used as an example.
|
5
|
+
# This filter will try to determine the language of the field given
|
6
|
+
# by the source parameter and output the result in either the language
|
7
|
+
# name (in English lowercase) or iso format
|
11
8
|
class LogStash::Filters::Languagedetect < LogStash::Filters::Base
|
12
9
|
|
13
10
|
# Setting the config_name here is required. This is how you
|
14
11
|
# configure this filter from your Logstash config.
|
15
12
|
#
|
16
13
|
# filter {
|
17
|
-
#
|
18
|
-
#
|
14
|
+
# languagedetect {
|
15
|
+
# source => "message"
|
16
|
+
# target => "lang"
|
17
|
+
# useiso => true
|
19
18
|
# }
|
20
19
|
# }
|
21
20
|
#
|
22
21
|
config_name "languagedetect"
|
23
22
|
|
24
|
-
#
|
23
|
+
# Set the source field which is used for the language check.
|
24
|
+
#
|
25
|
+
# Example:
|
26
|
+
# [source,ruby]
|
27
|
+
# filter {
|
28
|
+
# languagedetect {
|
29
|
+
# # tries to determine the language in "customfield"
|
30
|
+
# source => "customfield"
|
31
|
+
# }
|
32
|
+
# }
|
25
33
|
config :source, :validate => :string, :default => "message"
|
26
34
|
|
35
|
+
# Set the result field in which the result will be stored.
|
36
|
+
#
|
37
|
+
# Example:
|
38
|
+
# [source,ruby]
|
39
|
+
# filter {
|
40
|
+
# languagedetect {
|
41
|
+
# # outputs the result into "customoutput"
|
42
|
+
# target => "customoutput"
|
43
|
+
# }
|
44
|
+
# }
|
27
45
|
config :target, :validate => :string, :default => "lang"
|
28
46
|
|
47
|
+
# If useiso is true, result will be "en" instead of "english"
|
48
|
+
#
|
49
|
+
# Example:
|
50
|
+
# [source,ruby]
|
51
|
+
# filter {
|
52
|
+
# languagedetect {
|
53
|
+
# useiso => true
|
54
|
+
# }
|
55
|
+
# }
|
29
56
|
config :useiso, :validate => :boolean, :default => false
|
30
57
|
|
31
58
|
|
@@ -40,13 +67,11 @@ class LogStash::Filters::Languagedetect < LogStash::Filters::Base
|
|
40
67
|
text_to_detect = event[@source]
|
41
68
|
if @useiso
|
42
69
|
lang_found = @wl.language_iso(text_to_detect)
|
43
|
-
# lang_found = text_to_detect.language_iso
|
44
70
|
else
|
45
71
|
lang_found = @wl.language(text_to_detect)
|
46
|
-
# lang_found = text_to_detect.language
|
47
72
|
end
|
48
73
|
|
49
|
-
event[@target] = lang_found
|
74
|
+
event[@target] = lang_found.to_s.force_encoding(Encoding::UTF_8)
|
50
75
|
|
51
76
|
# filter_matched should go in the last line of our successful code
|
52
77
|
filter_matched(event)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-filter-languagedetect'
|
3
|
-
s.version = '0.0
|
4
|
-
s.version = "#{s.version}
|
3
|
+
s.version = '0.1.0'
|
4
|
+
s.version = "#{s.version}.alpha.#{ENV['TRAVIS_BUILD_NUMBER']}" if ENV['TRAVIS']
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "This example filter replaces the contents of the message field with the specified value."
|
7
7
|
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
@@ -1,7 +1,6 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require File.absolute_path(File.join(File.dirname(__FILE__), '../../spec/spec_helper'))
|
3
3
|
require File.absolute_path(File.join(File.dirname(__FILE__), '../../lib/logstash/filters/languagedetect'))
|
4
|
-
# require "lib/logstash/filters/languagedetect"
|
5
4
|
|
6
5
|
describe LogStash::Filters::Languagedetect do
|
7
6
|
describe "with defaults and non iso languages" do
|
@@ -14,22 +13,22 @@ describe LogStash::Filters::Languagedetect do
|
|
14
13
|
|
15
14
|
sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
|
16
15
|
expect(subject).to include('lang')
|
17
|
-
expect(subject['lang']).to eq(
|
16
|
+
expect(subject['lang']).to eq("english")
|
18
17
|
end
|
19
18
|
|
20
19
|
sample("message" => "dies ist ein deutscher Text") do
|
21
20
|
expect(subject).to include('lang')
|
22
|
-
expect(subject['lang']).to eq(
|
21
|
+
expect(subject['lang']).to eq("german")
|
23
22
|
end
|
24
23
|
|
25
24
|
sample("message" => "Deux autres personnes ont été arrêtées durant la nuit") do
|
26
25
|
expect(subject).to include('lang')
|
27
|
-
expect(subject['lang']).to eq(
|
26
|
+
expect(subject['lang']).to eq("french")
|
28
27
|
end
|
29
28
|
|
30
29
|
sample("message" => "En estado de máxima alertaen su nivel de crítico") do
|
31
30
|
expect(subject).to include('lang')
|
32
|
-
expect(subject['lang']).to eq(
|
31
|
+
expect(subject['lang']).to eq("spanish")
|
33
32
|
end
|
34
33
|
end
|
35
34
|
|
@@ -45,22 +44,56 @@ describe LogStash::Filters::Languagedetect do
|
|
45
44
|
|
46
45
|
sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
|
47
46
|
expect(subject).to include('lang')
|
48
|
-
expect(subject['lang']).to eq(
|
47
|
+
expect(subject['lang']).to eq("en")
|
49
48
|
end
|
50
49
|
|
51
50
|
sample("message" => "dies ist ein deutscher Text") do
|
52
51
|
expect(subject).to include('lang')
|
53
|
-
expect(subject['lang']).to eq(
|
52
|
+
expect(subject['lang']).to eq("de")
|
54
53
|
end
|
55
54
|
|
56
55
|
sample("message" => "Deux autres personnes ont été arrêtées durant la nuit") do
|
57
56
|
expect(subject).to include('lang')
|
58
|
-
expect(subject['lang']).to eq(
|
57
|
+
expect(subject['lang']).to eq("fr")
|
59
58
|
end
|
60
59
|
|
61
60
|
sample("message" => "En estado de máxima alertaen su nivel de crítico") do
|
62
61
|
expect(subject).to include('lang')
|
63
|
-
expect(subject['lang']).to eq(
|
62
|
+
expect(subject['lang']).to eq("es")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe "non default target" do
|
67
|
+
let(:config) do <<-CONFIG
|
68
|
+
filter {
|
69
|
+
languagedetect {
|
70
|
+
useiso => true
|
71
|
+
target => 'test'
|
72
|
+
}
|
73
|
+
}
|
74
|
+
CONFIG
|
75
|
+
end
|
76
|
+
|
77
|
+
sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
|
78
|
+
expect(subject).to include('test')
|
79
|
+
expect(subject['test']).to eq("en")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
describe "non default source" do
|
84
|
+
let(:config) do <<-CONFIG
|
85
|
+
filter {
|
86
|
+
languagedetect {
|
87
|
+
useiso => true
|
88
|
+
source => 'test'
|
89
|
+
}
|
90
|
+
}
|
91
|
+
CONFIG
|
92
|
+
end
|
93
|
+
|
94
|
+
sample("test" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
|
95
|
+
expect(subject).to include('lang')
|
96
|
+
expect(subject['lang']).to eq("en")
|
64
97
|
end
|
65
98
|
end
|
66
99
|
end
|