logstash-filter-languagedetect 0.0.1.pre.alpha.pre.12 → 0.1.0.alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MzdmZWM0OTlhZThlY2Q5MmQ2YmUyNzM1OTcwNWZkMzRkMzg2NTVjNQ==
4
+ NTgxOTYxMjU0ZTRhNzUyMDY0MzI0NjFhODFlNGE1YmI0OWJiM2NhMw==
5
5
  data.tar.gz: !binary |-
6
- OTc2NTRlM2Q5NjlkMTVkZTBiZGRkYmM2NWM4YWRkZjZkMWIzMmIwMg==
6
+ MWQ3ZmI3ZTBmNjg5YTEwYjZhOTRhMTU3ZTllN2Y1OGMxZTFlYzk0MA==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ODQ5MGYzNTBkZmVmMmRlMjhkMzFjYmJlZWVjYTk4YmM0ZjUyMmEyOTk0NTk3
10
- ZjMyYzRlNWEyMWQ3OGViYjViMzUzYjk5NTliMWYzMzliYTljZTE1MjZmMzYw
11
- NDRhMGQyMDZmMGI2ZGQ2MzYxYzk1OTRiMTMwN2E4NTQzMjYxYjI=
9
+ NjI0NjBhODQyYzgyODYzZmJhZTMyZjc4NGE1YjQ2NDNhYTEyOWE2MTRiNmZl
10
+ NDZiMDhiZjRjZDJmNmZiNjM2YTU3ZDBhYTFiNGZlNzE3MDQ1OGZlMzE1NGJj
11
+ NzMwNzEwNGNhODZmNjI2NWNiNzdhMDhlMzZkZmQ1MWZiYzliZDE=
12
12
  data.tar.gz: !binary |-
13
- YzJhMzc1MDMyYjE5NDJjMDhkNjQ4ZDIzODgyMDYyNTg3OTc3MTcxYzUxNzkx
14
- MmYwNWZlOTE4Y2QyZjQ0NzgyZmU2ZjBmNTQ2ZGMyN2EzYTk5MWI1Zjc4NjQw
15
- MzBmNTNlYzQ5NDJjYTU1MGQ3NGFmZmFmZmViZDAzNTNmMDcwNDg=
13
+ MzZhZTU4MzM0ZTlhZGQxNWZkYjE5OWQ2ZDU3MTcwODgzM2EyNzkwZDFlZjA1
14
+ MGEzNjdiODVhZjk2ODc0OGQ4YTQyN2QyYTcyYzE5OWFkZDYyMDZhNWIzZGQ4
15
+ MDNiMWQ5MTRmNmJkNWVkZmVmNWVmYmI4MTNjNGRlMDcwYjlkZjQ=
data/Gemfile CHANGED
@@ -1,4 +1,3 @@
1
1
  source 'https://rubygems.org'
2
-
2
+ gemspec
3
3
  gem 'whatlanguage', :git => "https://github.com/peterc/whatlanguage.git", :ref => "0192301022"
4
- gemspec
@@ -1,31 +1,58 @@
1
- # encoding: utf-8
2
1
  require "logstash/filters/base"
3
2
  require "logstash/namespace"
4
3
  require 'whatlanguage'
5
- # require 'whatlanguage/string'
6
4
 
7
- # This example filter will replace the contents of the default
8
- # message field with whatever you specify in the configuration.
9
- #
10
- # It is only intended to be used as an example.
5
+ # This filter will try to determine the language of the field given
6
+ # by the source parameter and output the result in either the language
7
+ # name (in English lowercase) or iso format
11
8
  class LogStash::Filters::Languagedetect < LogStash::Filters::Base
12
9
 
13
10
  # Setting the config_name here is required. This is how you
14
11
  # configure this filter from your Logstash config.
15
12
  #
16
13
  # filter {
17
- # example {
18
- # message => "My message..."
14
+ # languagedetect {
15
+ # source => "message"
16
+ # target => "lang"
17
+ # useiso => true
19
18
  # }
20
19
  # }
21
20
  #
22
21
  config_name "languagedetect"
23
22
 
24
- # Replace the message with this value.
23
+ # Set the source field which is used for the language check.
24
+ #
25
+ # Example:
26
+ # [source,ruby]
27
+ # filter {
28
+ # languagedetect {
29
+ # # tries to determine the language in "customfield"
30
+ # source => "customfield"
31
+ # }
32
+ # }
25
33
  config :source, :validate => :string, :default => "message"
26
34
 
35
+ # Set the result field in which the result will be stored.
36
+ #
37
+ # Example:
38
+ # [source,ruby]
39
+ # filter {
40
+ # languagedetect {
41
+ # # outputs the result into "customoutput"
42
+ # target => "customoutput"
43
+ # }
44
+ # }
27
45
  config :target, :validate => :string, :default => "lang"
28
46
 
47
+ # If useiso is true, result will be "en" instead of "english"
48
+ #
49
+ # Example:
50
+ # [source,ruby]
51
+ # filter {
52
+ # languagedetect {
53
+ # useiso => true
54
+ # }
55
+ # }
29
56
  config :useiso, :validate => :boolean, :default => false
30
57
 
31
58
 
@@ -40,13 +67,11 @@ class LogStash::Filters::Languagedetect < LogStash::Filters::Base
40
67
  text_to_detect = event[@source]
41
68
  if @useiso
42
69
  lang_found = @wl.language_iso(text_to_detect)
43
- # lang_found = text_to_detect.language_iso
44
70
  else
45
71
  lang_found = @wl.language(text_to_detect)
46
- # lang_found = text_to_detect.language
47
72
  end
48
73
 
49
- event[@target] = lang_found
74
+ event[@target] = lang_found.to_s.force_encoding(Encoding::UTF_8)
50
75
 
51
76
  # filter_matched should go in the last line of our successful code
52
77
  filter_matched(event)
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-filter-languagedetect'
3
- s.version = '0.0.1'
4
- s.version = "#{s.version}-alpha-#{ENV['TRAVIS_BUILD_NUMBER']}" if ENV['TRAVIS']
3
+ s.version = '0.1.0'
4
+ s.version = "#{s.version}.alpha.#{ENV['TRAVIS_BUILD_NUMBER']}" if ENV['TRAVIS']
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "This example filter replaces the contents of the message field with the specified value."
7
7
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -1,7 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require File.absolute_path(File.join(File.dirname(__FILE__), '../../spec/spec_helper'))
3
3
  require File.absolute_path(File.join(File.dirname(__FILE__), '../../lib/logstash/filters/languagedetect'))
4
- # require "lib/logstash/filters/languagedetect"
5
4
 
6
5
  describe LogStash::Filters::Languagedetect do
7
6
  describe "with defaults and non iso languages" do
@@ -14,22 +13,22 @@ describe LogStash::Filters::Languagedetect do
14
13
 
15
14
  sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
16
15
  expect(subject).to include('lang')
17
- expect(subject['lang']).to eq(:english)
16
+ expect(subject['lang']).to eq("english")
18
17
  end
19
18
 
20
19
  sample("message" => "dies ist ein deutscher Text") do
21
20
  expect(subject).to include('lang')
22
- expect(subject['lang']).to eq(:german)
21
+ expect(subject['lang']).to eq("german")
23
22
  end
24
23
 
25
24
  sample("message" => "Deux autres personnes ont été arrêtées durant la nuit") do
26
25
  expect(subject).to include('lang')
27
- expect(subject['lang']).to eq(:french)
26
+ expect(subject['lang']).to eq("french")
28
27
  end
29
28
 
30
29
  sample("message" => "En estado de máxima alertaen su nivel de crítico") do
31
30
  expect(subject).to include('lang')
32
- expect(subject['lang']).to eq(:spanish)
31
+ expect(subject['lang']).to eq("spanish")
33
32
  end
34
33
  end
35
34
 
@@ -45,22 +44,56 @@ describe LogStash::Filters::Languagedetect do
45
44
 
46
45
  sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
47
46
  expect(subject).to include('lang')
48
- expect(subject['lang']).to eq(:en)
47
+ expect(subject['lang']).to eq("en")
49
48
  end
50
49
 
51
50
  sample("message" => "dies ist ein deutscher Text") do
52
51
  expect(subject).to include('lang')
53
- expect(subject['lang']).to eq(:de)
52
+ expect(subject['lang']).to eq("de")
54
53
  end
55
54
 
56
55
  sample("message" => "Deux autres personnes ont été arrêtées durant la nuit") do
57
56
  expect(subject).to include('lang')
58
- expect(subject['lang']).to eq(:fr)
57
+ expect(subject['lang']).to eq("fr")
59
58
  end
60
59
 
61
60
  sample("message" => "En estado de máxima alertaen su nivel de crítico") do
62
61
  expect(subject).to include('lang')
63
- expect(subject['lang']).to eq(:es)
62
+ expect(subject['lang']).to eq("es")
63
+ end
64
+ end
65
+
66
+ describe "non default target" do
67
+ let(:config) do <<-CONFIG
68
+ filter {
69
+ languagedetect {
70
+ useiso => true
71
+ target => 'test'
72
+ }
73
+ }
74
+ CONFIG
75
+ end
76
+
77
+ sample("message" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
78
+ expect(subject).to include('test')
79
+ expect(subject['test']).to eq("en")
80
+ end
81
+ end
82
+
83
+ describe "non default source" do
84
+ let(:config) do <<-CONFIG
85
+ filter {
86
+ languagedetect {
87
+ useiso => true
88
+ source => 'test'
89
+ }
90
+ }
91
+ CONFIG
92
+ end
93
+
94
+ sample("test" => "The links between the attempted car bombings in Glasgow and London are becoming clearer") do
95
+ expect(subject).to include('lang')
96
+ expect(subject['lang']).to eq("en")
64
97
  end
65
98
  end
66
99
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-languagedetect
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.pre.alpha.pre.12
4
+ version: 0.1.0.alpha.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Torsten Feld