logstash-filter-languagedetect 0.0.1.pre.alpha.pre.12 → 0.1.0.alpha.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MzdmZWM0OTlhZThlY2Q5MmQ2YmUyNzM1OTcwNWZkMzRkMzg2NTVjNQ==
4
+ NTgxOTYxMjU0ZTRhNzUyMDY0MzI0NjFhODFlNGE1YmI0OWJiM2NhMw==
5
5
  data.tar.gz: !binary |-
6
- OTc2NTRlM2Q5NjlkMTVkZTBiZGRkYmM2NWM4YWRkZjZkMWIzMmIwMg==
6
+ MWQ3ZmI3ZTBmNjg5YTEwYjZhOTRhMTU3ZTllN2Y1OGMxZTFlYzk0MA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ODQ5MGYzNTBkZmVmMmRlMjhkMzFjYmJlZWVjYTk4YmM0ZjUyMmEyOTk0NTk3
10
- ZjMyYzRlNWEyMWQ3OGViYjViMzUzYjk5NTliMWYzMzliYTljZTE1MjZmMzYw
11
- NDRhMGQyMDZmMGI2ZGQ2MzYxYzk1OTRiMTMwN2E4NTQzMjYxYjI=
9
+ NjI0NjBhODQyYzgyODYzZmJhZTMyZjc4NGE1YjQ2NDNhYTEyOWE2MTRiNmZl
10
+ NDZiMDhiZjRjZDJmNmZiNjM2YTU3ZDBhYTFiNGZlNzE3MDQ1OGZlMzE1NGJj
11
+ NzMwNzEwNGNhODZmNjI2NWNiNzdhMDhlMzZkZmQ1MWZiYzliZDE=
12
12
  data.tar.gz: !binary |-
13
- YzJhMzc1MDMyYjE5NDJjMDhkNjQ4ZDIzODgyMDYyNTg3OTc3MTcxYzUxNzkx
14
- MmYwNWZlOTE4Y2QyZjQ0NzgyZmU2ZjBmNTQ2ZGMyN2EzYTk5MWI1Zjc4NjQw
15
- MzBmNTNlYzQ5NDJjYTU1MGQ3NGFmZmFmZmViZDAzNTNmMDcwNDg=
13
+ MzZhZTU4MzM0ZTlhZGQxNWZkYjE5OWQ2ZDU3MTcwODgzM2EyNzkwZDFlZjA1
14
+ MGEzNjdiODVhZjk2ODc0OGQ4YTQyN2QyYTcyYzE5OWFkZDYyMDZhNWIzZGQ4
15
+ MDNiMWQ5MTRmNmJkNWVkZmVmNWVmYmI4MTNjNGRlMDcwYjlkZjQ=
data/Gemfile CHANGED
@@ -1,4 +1,3 @@
1
1
  source 'https://rubygems.org'
2
-
2
+ gemspec
3
3
  gem 'whatlanguage', :git => "https://github.com/peterc/whatlanguage.git", :ref => "0192301022"
4
- gemspec
@@ -1,31 +1,58 @@
1
- # encoding: utf-8
2
1
  require "logstash/filters/base"
3
2
  require "logstash/namespace"
4
3
  require 'whatlanguage'
5
- # require 'whatlanguage/string'
6
4
 
7
- # This example filter will replace the contents of the default
8
- # message field with whatever you specify in the configuration.
9
- #
10
- # It is only intended to be used as an example.
5
+ # This filter will try to determine the language of the field given
6
+ # by the source parameter and output the result in either the language
7
+ # name (in English lowercase) or iso format
11
8
  class LogStash::Filters::Languagedetect < LogStash::Filters::Base
12
9
 
13
10
  # Setting the config_name here is required. This is how you
14
11
  # configure this filter from your Logstash config.
15
12
  #
16
13
  # filter {
17
- # example {
18
- # message => "My message..."
14
+ # languagedetect {
15
+ # source => "message"
16
+ # target => "lang"
17
+ # useiso => true
19
18
  # }
20
19
  # }
21
20
  #
22
21
  config_name "languagedetect"
23
22
 
24
- # Replace the message with this value.
23
+ # Set the source field which is used for the language check.
24
+ #
25
+ # Example:
26
+ # [source,ruby]
27
+ # filter {
28
+ # languagedetect {
29
+ # # tries to determine the language in "customfield"
30
+ # source => "customfield"
31
+ # }
32
+ # }
25
33
  config :source, :validate => :string, :default => "message"
26
34
 
35
+ # Set the result field in which the result will be stored.
36
+ #
37
+ # Example:
38
+ # [source,ruby]
39
+ # filter {
40
+ # languagedetect {
41
+ # # outputs the result into "customoutput"
42
+ # target => "customoutput"
43
+ # }
44
+ # }
27
45
  config :target, :validate => :string, :default => "lang"
28
46
 
47
+ # If useiso is true, result will be "en" instead of "english"
48
+ #
49
+ # Example:
50
+ # [source,ruby]
51
+ # filter {
52
+ # languagedetect {
53
+ # useiso => true
54
+ # }
55
+ # }
29
56
  config :useiso, :validate => :boolean, :default => false
30
57
 
31
58
 
@@ -40,13 +67,11 @@ class LogStash::Filters::Languagedetect < LogStash::Filters::Base
40
67
  text_to_detect = event[@source]
41
68
  if @useiso
42
69
  lang_found = @wl.language_iso(text_to_detect)
43
- # lang_found = text_to_detect.language_iso
44
70
  else
45
71
  lang_found = @wl.language(text_to_detect)
46
- # lang_found = text_to_detect.language
47
72
  end
48
73
 
49
- event[@target] = lang_found
74
+ event[@target] = lang_found.to_s.force_encoding(Encoding::UTF_8)
50
75
 
51
76
  # filter_matched should go in the last line of our successful code
52
77
  filter_matched(event)
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-filter-languagedetect'
3
- s.version = '0.0.1'
4
- s.version = "#{s.version}-alpha-#{ENV['TRAVIS_BUILD_NUMBER']}" if ENV['TRAVIS']
3
+ s.version = '0.1.0'
4
+ s.version = "#{s.version}.alpha.#{ENV['TRAVIS_BUILD_NUMBER']}" if ENV['TRAVIS']
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "This example filter replaces the contents of the message field with the specified value."
7
7
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -1,7 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require File.absolute_path(File.join(File.dirname(__FILE__), '../../spec/spec_helper'))
3
3
  require File.absolute_path(File.join(File.dirname(__FILE__), '../../lib/logstash/filters/languagedetect'))
4
- # require "lib/logstash/filters/languagedetect"
5
4
 
6
5
  describe LogStash::Filters::Languagedetect do
7
6
  describe "with defaults and non iso languages" do
@@ -14,22 +13,22 @@ describe LogStash::Filters::Languagedetect do
14
13
 
15
14
  sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
16
15
  expect(subject).to include('lang')
17
- expect(subject['lang']).to eq(:english)
16
+ expect(subject['lang']).to eq("english")
18
17
  end
19
18
 
20
19
  sample("message" => "dies ist ein deutscher Text") do
21
20
  expect(subject).to include('lang')
22
- expect(subject['lang']).to eq(:german)
21
+ expect(subject['lang']).to eq("german")
23
22
  end
24
23
 
25
24
  sample("message" => "Deux autres personnes ont été arrêtées durant la nuit") do
26
25
  expect(subject).to include('lang')
27
- expect(subject['lang']).to eq(:french)
26
+ expect(subject['lang']).to eq("french")
28
27
  end
29
28
 
30
29
  sample("message" => "En estado de máxima alertaen su nivel de crítico") do
31
30
  expect(subject).to include('lang')
32
- expect(subject['lang']).to eq(:spanish)
31
+ expect(subject['lang']).to eq("spanish")
33
32
  end
34
33
  end
35
34
 
@@ -45,22 +44,56 @@ describe LogStash::Filters::Languagedetect do
45
44
 
46
45
  sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
47
46
  expect(subject).to include('lang')
48
- expect(subject['lang']).to eq(:en)
47
+ expect(subject['lang']).to eq("en")
49
48
  end
50
49
 
51
50
  sample("message" => "dies ist ein deutscher Text") do
52
51
  expect(subject).to include('lang')
53
- expect(subject['lang']).to eq(:de)
52
+ expect(subject['lang']).to eq("de")
54
53
  end
55
54
 
56
55
  sample("message" => "Deux autres personnes ont été arrêtées durant la nuit") do
57
56
  expect(subject).to include('lang')
58
- expect(subject['lang']).to eq(:fr)
57
+ expect(subject['lang']).to eq("fr")
59
58
  end
60
59
 
61
60
  sample("message" => "En estado de máxima alertaen su nivel de crítico") do
62
61
  expect(subject).to include('lang')
63
- expect(subject['lang']).to eq(:es)
62
+ expect(subject['lang']).to eq("es")
63
+ end
64
+ end
65
+
66
+ describe "non default target" do
67
+ let(:config) do <<-CONFIG
68
+ filter {
69
+ languagedetect {
70
+ useiso => true
71
+ target => 'test'
72
+ }
73
+ }
74
+ CONFIG
75
+ end
76
+
77
+ sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
78
+ expect(subject).to include('test')
79
+ expect(subject['test']).to eq("en")
80
+ end
81
+ end
82
+
83
+ describe "non default source" do
84
+ let(:config) do <<-CONFIG
85
+ filter {
86
+ languagedetect {
87
+ useiso => true
88
+ source => 'test'
89
+ }
90
+ }
91
+ CONFIG
92
+ end
93
+
94
+ sample("test" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
95
+ expect(subject).to include('lang')
96
+ expect(subject['lang']).to eq("en")
64
97
  end
65
98
  end
66
99
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-languagedetect
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.pre.alpha.pre.12
4
+ version: 0.1.0.alpha.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Torsten Feld