rcs-common 9.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +49 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.txt +1 -0
  5. data/Rakefile +27 -0
  6. data/lib/rcs-common.rb +21 -0
  7. data/lib/rcs-common/binary.rb +64 -0
  8. data/lib/rcs-common/cgi.rb +7 -0
  9. data/lib/rcs-common/component.rb +87 -0
  10. data/lib/rcs-common/crypt.rb +71 -0
  11. data/lib/rcs-common/deploy.rb +96 -0
  12. data/lib/rcs-common/diagnosticable.rb +136 -0
  13. data/lib/rcs-common/evidence.rb +261 -0
  14. data/lib/rcs-common/evidence/addressbook.rb +173 -0
  15. data/lib/rcs-common/evidence/application.rb +59 -0
  16. data/lib/rcs-common/evidence/calendar.rb +62 -0
  17. data/lib/rcs-common/evidence/call.rb +185 -0
  18. data/lib/rcs-common/evidence/camera.rb +25 -0
  19. data/lib/rcs-common/evidence/chat.rb +272 -0
  20. data/lib/rcs-common/evidence/clibpoard.rb +58 -0
  21. data/lib/rcs-common/evidence/command.rb +50 -0
  22. data/lib/rcs-common/evidence/common.rb +78 -0
  23. data/lib/rcs-common/evidence/content/camera/001.jpg +0 -0
  24. data/lib/rcs-common/evidence/content/coin/wallet_bit.dat +0 -0
  25. data/lib/rcs-common/evidence/content/coin/wallet_lite.dat +0 -0
  26. data/lib/rcs-common/evidence/content/file/Einstein.docx +0 -0
  27. data/lib/rcs-common/evidence/content/file/arabic.docx +0 -0
  28. data/lib/rcs-common/evidence/content/mouse/001.jpg +0 -0
  29. data/lib/rcs-common/evidence/content/mouse/002.jpg +0 -0
  30. data/lib/rcs-common/evidence/content/mouse/003.jpg +0 -0
  31. data/lib/rcs-common/evidence/content/mouse/004.jpg +0 -0
  32. data/lib/rcs-common/evidence/content/print/001.jpg +0 -0
  33. data/lib/rcs-common/evidence/content/screenshot/001.jpg +0 -0
  34. data/lib/rcs-common/evidence/content/screenshot/002.jpg +0 -0
  35. data/lib/rcs-common/evidence/content/screenshot/003.jpg +0 -0
  36. data/lib/rcs-common/evidence/content/url/001.jpg +0 -0
  37. data/lib/rcs-common/evidence/content/url/002.jpg +0 -0
  38. data/lib/rcs-common/evidence/content/url/003.jpg +0 -0
  39. data/lib/rcs-common/evidence/device.rb +23 -0
  40. data/lib/rcs-common/evidence/download.rb +54 -0
  41. data/lib/rcs-common/evidence/exec.rb +0 -0
  42. data/lib/rcs-common/evidence/file.rb +129 -0
  43. data/lib/rcs-common/evidence/filesystem.rb +71 -0
  44. data/lib/rcs-common/evidence/info.rb +24 -0
  45. data/lib/rcs-common/evidence/keylog.rb +84 -0
  46. data/lib/rcs-common/evidence/mail.rb +237 -0
  47. data/lib/rcs-common/evidence/mic.rb +39 -0
  48. data/lib/rcs-common/evidence/mms.rb +36 -0
  49. data/lib/rcs-common/evidence/money.rb +676 -0
  50. data/lib/rcs-common/evidence/mouse.rb +62 -0
  51. data/lib/rcs-common/evidence/password.rb +60 -0
  52. data/lib/rcs-common/evidence/photo.rb +80 -0
  53. data/lib/rcs-common/evidence/position.rb +303 -0
  54. data/lib/rcs-common/evidence/print.rb +50 -0
  55. data/lib/rcs-common/evidence/screenshot.rb +53 -0
  56. data/lib/rcs-common/evidence/sms.rb +91 -0
  57. data/lib/rcs-common/evidence/url.rb +133 -0
  58. data/lib/rcs-common/fixnum.rb +48 -0
  59. data/lib/rcs-common/gridfs.rb +294 -0
  60. data/lib/rcs-common/heartbeat.rb +96 -0
  61. data/lib/rcs-common/keywords.rb +50 -0
  62. data/lib/rcs-common/mime.rb +65 -0
  63. data/lib/rcs-common/mongoid.rb +19 -0
  64. data/lib/rcs-common/pascalize.rb +62 -0
  65. data/lib/rcs-common/path_utils.rb +67 -0
  66. data/lib/rcs-common/resolver.rb +40 -0
  67. data/lib/rcs-common/rest.rb +17 -0
  68. data/lib/rcs-common/sanitize.rb +42 -0
  69. data/lib/rcs-common/serializer.rb +404 -0
  70. data/lib/rcs-common/signature.rb +141 -0
  71. data/lib/rcs-common/stats.rb +94 -0
  72. data/lib/rcs-common/symbolize.rb +10 -0
  73. data/lib/rcs-common/systemstatus.rb +136 -0
  74. data/lib/rcs-common/temporary.rb +13 -0
  75. data/lib/rcs-common/time.rb +24 -0
  76. data/lib/rcs-common/trace.rb +138 -0
  77. data/lib/rcs-common/trace.yaml +42 -0
  78. data/lib/rcs-common/updater/client.rb +354 -0
  79. data/lib/rcs-common/updater/dsl.rb +178 -0
  80. data/lib/rcs-common/updater/payload.rb +79 -0
  81. data/lib/rcs-common/updater/server.rb +126 -0
  82. data/lib/rcs-common/updater/shared_key.rb +55 -0
  83. data/lib/rcs-common/updater/tmp_dir.rb +13 -0
  84. data/lib/rcs-common/utf16le.rb +83 -0
  85. data/lib/rcs-common/version.rb +5 -0
  86. data/lib/rcs-common/winfirewall.rb +235 -0
  87. data/rcs-common.gemspec +64 -0
  88. data/spec/gridfs_spec.rb +637 -0
  89. data/spec/mongoid.yaml +6 -0
  90. data/spec/signature_spec.rb +105 -0
  91. data/spec/spec_helper.rb +22 -0
  92. data/spec/updater_spec.rb +80 -0
  93. data/tasks/deploy.rake +21 -0
  94. data/tasks/protect.rake +90 -0
  95. data/test/helper.rb +17 -0
  96. data/test/test_binary.rb +107 -0
  97. data/test/test_cgi.rb +14 -0
  98. data/test/test_crypt.rb +125 -0
  99. data/test/test_evidence.rb +52 -0
  100. data/test/test_evidence_manager.rb +119 -0
  101. data/test/test_fixnum.rb +35 -0
  102. data/test/test_keywords.rb +137 -0
  103. data/test/test_mime.rb +49 -0
  104. data/test/test_pascalize.rb +100 -0
  105. data/test/test_path_utils.rb +24 -0
  106. data/test/test_rcs-common.rb +7 -0
  107. data/test/test_sanitize.rb +40 -0
  108. data/test/test_serialization.rb +20 -0
  109. data/test/test_stats.rb +90 -0
  110. data/test/test_symbolize.rb +20 -0
  111. data/test/test_systemstatus.rb +35 -0
  112. data/test/test_time.rb +56 -0
  113. data/test/test_trace.rb +25 -0
  114. data/test/test_utf16le.rb +71 -0
  115. data/test/test_winfirewall.rb +68 -0
  116. metadata +423 -0
@@ -0,0 +1,14 @@
1
+ require 'helper'
2
+
3
+ class URITest < Test::Unit::TestCase
4
+
5
+ def test_hash_to_uri_query
6
+ h = {'q' => 'pippo', 'filter' => 'pluto', :symbol => 'paperino'}
7
+ assert_equal "q=pippo&filter=pluto&symbol=paperino", CGI.encode_query(h)
8
+ end
9
+
10
+ def test_hash_with_special_chars_to_uri_query
11
+ h = {'q' => 'pippo pluto paperino'}
12
+ assert_equal "q=pippo+pluto+paperino", CGI.encode_query(h)
13
+ end
14
+ end
@@ -0,0 +1,125 @@
1
+ require 'helper'
2
+ require 'securerandom'
3
+
4
+ class TestRcsCommon < Test::Unit::TestCase
5
+
6
+ include RCS::Crypt
7
+
8
+ def test_crypt_multiple_of_block_size
9
+
10
+ clear = SecureRandom.random_bytes(16)
11
+ key = Digest::MD5.digest "4yeN5zu0+il3Jtcb5a1sBcAdjYFcsD9z"
12
+
13
+ # padding is ON by default
14
+ enc = aes_encrypt(clear, key)
15
+
16
+ # must be multiple of block_len
17
+ assert_equal true, enc.length % 16 == 0
18
+
19
+ dec = aes_decrypt(enc, key)
20
+
21
+ assert_equal clear, dec
22
+ end
23
+
24
+ def test_crypt_not_multiple_of_block_size
25
+
26
+ clear = SecureRandom.random_bytes(19)
27
+ key = Digest::MD5.digest "4yeN5zu0+il3Jtcb5a1sBcAdjYFcsD9z"
28
+
29
+ # padding is ON by default
30
+ enc = aes_encrypt(clear, key)
31
+
32
+ # must be multiple of block_len
33
+ assert_equal true, enc.length % 16 == 0
34
+
35
+ dec = aes_decrypt(enc, key)
36
+
37
+ assert_equal clear, dec
38
+ end
39
+
40
+ def test_crypt_multiple_of_block_size_no_pad
41
+
42
+ clear = SecureRandom.random_bytes(16)
43
+ key = Digest::MD5.digest "4yeN5zu0+il3Jtcb5a1sBcAdjYFcsD9z"
44
+
45
+ # padding is ON by default, disable it
46
+ enc = aes_encrypt(clear, key, PAD_NOPAD)
47
+
48
+ # must be exactly the same size
49
+ assert_equal true, enc.length == clear.length
50
+
51
+ dec = aes_decrypt(enc, key, PAD_NOPAD)
52
+
53
+ assert_equal clear, dec
54
+ end
55
+
56
+ def test_crypt_not_multiple_of_block_size_no_pad
57
+
58
+ clear = SecureRandom.random_bytes(19)
59
+ key = Digest::MD5.digest "4yeN5zu0+il3Jtcb5a1sBcAdjYFcsD9z"
60
+
61
+ # padding is ON by default, disable it
62
+ assert_raise(OpenSSL::Cipher::CipherError) do
63
+ aes_encrypt(clear, key, PAD_NOPAD)
64
+ end
65
+ end
66
+
67
+ def test_crypt_wrong_padding
68
+
69
+ clear = SecureRandom.random_bytes(16)
70
+ key = Digest::MD5.digest "4yeN5zu0+il3Jtcb5a1sBcAdjYFcsD9z"
71
+
72
+ # padding is ON by default, disable it
73
+ enc = aes_encrypt(clear, key, PAD_NOPAD)
74
+
75
+ # must be exactly the same size
76
+ assert_equal true, enc.length == clear.length
77
+
78
+ assert_raise(OpenSSL::Cipher::CipherError) do
79
+ aes_decrypt(enc, key)
80
+ end
81
+ end
82
+
83
+ def test_crypt_integrity
84
+ clear = SecureRandom.random_bytes(21)
85
+ key = Digest::MD5.digest "secret"
86
+
87
+ enc = aes_encrypt_integrity(clear, key)
88
+ dec = aes_decrypt(enc, key)
89
+
90
+ # extract the sha1 integrity check
91
+ check = dec.slice!(dec.length - Digest::SHA1.new.digest_length, dec.length)
92
+
93
+ assert_equal clear, dec
94
+ assert_equal check, Digest::SHA1.digest(dec)
95
+ end
96
+
97
+ def test_decrypt_integrity
98
+ clear = SecureRandom.random_bytes(21)
99
+ key = Digest::MD5.digest "secret"
100
+
101
+ clear_check = clear + Digest::SHA1.digest(clear)
102
+
103
+ enc = aes_encrypt(clear_check, key)
104
+ dec = ""
105
+ assert_nothing_raised do
106
+ dec = aes_decrypt_integrity(enc, key)
107
+ end
108
+ assert_equal clear, dec
109
+ end
110
+
111
+ def test_decrypt_integrity_fail
112
+ clear = SecureRandom.random_bytes(21)
113
+ key = Digest::MD5.digest "secret"
114
+
115
+ # fake sha1 check
116
+ clear_check = clear + SecureRandom.random_bytes(20)
117
+ enc = aes_encrypt(clear_check, key)
118
+
119
+ # this will fail to validate the sha1
120
+ assert_raise RuntimeError do
121
+ aes_decrypt_integrity(enc, key)
122
+ end
123
+ end
124
+
125
+ end
@@ -0,0 +1,52 @@
1
+ require "helper"
2
+ require "rcs-common/evidence"
3
+ module RCS
4
+
5
+ # TODO: implement more test cases for Evidence class
6
+ class TestEvidence < Test::Unit::TestCase
7
+
8
+ # Called before every test method runs. Can be used
9
+ # to set up fixture information.
10
+ def setup
11
+ @key = ["000102030405060708090a0b0c0d0e0f"].pack('H*')
12
+ @info = { :device_id => "test-device", :user_id => "test-user", :source_id => "127.0.0.1" }
13
+ end
14
+
15
+ # Called after every test method runs. Can be used to tear
16
+ # down fixture information.
17
+
18
+ def teardown
19
+ # Do nothing
20
+ end
21
+
22
+ # TODO: this test is not really a good one ... tests both generation and deserialization of evidence :(
23
+ def test_generate
24
+ piece = RCS::Evidence.new(@key).generate(:DEVICE, @info)
25
+ evidences, action = RCS::Evidence.new(@key).deserialize(piece.binary)
26
+
27
+ assert_equal piece.content.force_encoding('UTF-16LE').encode('UTF-8'), evidences[0][:data][:content]
28
+ end
29
+
30
+ def test_align_to_block_len
31
+ evidence = RCS::Evidence.new(@key)
32
+ assert_equal(0, evidence.align_to_block_len(0))
33
+ assert_equal(16, evidence.align_to_block_len(15))
34
+ assert_equal(16, evidence.align_to_block_len(16))
35
+ assert_equal(32, evidence.align_to_block_len(17))
36
+ end
37
+
38
+ def test_encrypt
39
+ evidence = RCS::Evidence.new(@key)
40
+ test_string = ['00112233445566778899aabbccddeeff'].pack('H*')
41
+ assert_equal('69c4e0d86a7b0430d8cdb78070b4c55a', evidence.encrypt(test_string).unpack('H*').shift)
42
+ end
43
+
44
+ def test_decrypt
45
+ evidence = RCS::Evidence.new(@key)
46
+ test_string = ['69c4e0d86a7b0430d8cdb78070b4c55a'].pack('H*')
47
+ assert_equal('00112233445566778899aabbccddeeff', evidence.decrypt(test_string).unpack('H*').shift)
48
+ end
49
+
50
+ end
51
+
52
+ end # RCS::
@@ -0,0 +1,119 @@
1
+ require 'helper'
2
+
3
+ =begin
4
+ module RCS
5
+
6
+ # dirty hack to fake the trace function
7
+ # re-open the class and override the method
8
+ class EvidenceManager
9
+ def trace(a, b)
10
+ end
11
+ end
12
+
13
+ class TestEvidenceManager < Test::Unit::TestCase
14
+
15
+ def setup
16
+ @instance = "test-instance"
17
+ EvidenceManager.instance.create_repository @instance
18
+ assert_true File.exist?(EvidenceManager::REPO_DIR + '/' + @instance)
19
+ @session = {:bid => '141178',
20
+ :build => 'test-build',
21
+ :instance => @instance,
22
+ :subtype => 'test-subtype'}
23
+
24
+ @ident = [2011010101, 'test-user', 'test-device', 'test-source']
25
+ @now = Time.now.getutc.to_i
26
+ end
27
+
28
+ def teardown
29
+ File.delete(EvidenceManager::REPO_DIR + '/' + @instance) if File.exist?(EvidenceManager::REPO_DIR + '/' + @instance)
30
+ Dir.delete(EvidenceManager::REPO_DIR) if File.directory?(EvidenceManager::REPO_DIR)
31
+ end
32
+
33
+ def test_sync_start
34
+ EvidenceManager.instance.sync_start @session, *@ident, @now
35
+
36
+ info = EvidenceManager.instance.instance_info @session[:instance]
37
+
38
+ assert_equal @session[:bid], info['bid']
39
+ assert_equal @session[:build], info['build']
40
+ assert_equal @session[:instance], info['instance']
41
+ assert_equal @session[:subtype], info['subtype']
42
+ assert_equal @ident[0], info['version']
43
+ assert_equal @ident[1], info['user']
44
+ assert_equal @ident[2], info['device']
45
+ assert_equal @ident[3], info['source']
46
+ assert_equal @now.to_i, info['sync_time']
47
+ assert_equal EvidenceManager::SYNC_IN_PROGRESS, info['sync_status']
48
+ end
49
+
50
+ def test_sync_timeout_after_start
51
+ EvidenceManager.instance.sync_start @session, *@ident, @now
52
+ EvidenceManager.instance.sync_timeout @session
53
+ info = EvidenceManager.instance.instance_info @session[:instance]
54
+ assert_equal EvidenceManager::SYNC_TIMEOUTED, info['sync_status']
55
+ end
56
+
57
+ def test_sync_timeout_after_end
58
+ EvidenceManager.instance.sync_start @session, *@ident, @now
59
+ EvidenceManager.instance.sync_end @session
60
+ EvidenceManager.instance.sync_timeout @session
61
+ info = EvidenceManager.instance.instance_info @session[:instance]
62
+ assert_equal EvidenceManager::SYNC_IDLE, info['sync_status']
63
+ end
64
+
65
+ def test_sync_timeout_all
66
+ EvidenceManager.instance.sync_start @session, *@ident, @now
67
+ EvidenceManager.instance.sync_timeout_all
68
+ info = EvidenceManager.instance.instance_info @session[:instance]
69
+ assert_equal EvidenceManager::SYNC_TIMEOUTED, info['sync_status']
70
+ end
71
+
72
+ def test_sync_timeout_all_idle
73
+ EvidenceManager.instance.sync_start @session, *@ident, @now
74
+ EvidenceManager.instance.sync_end @session
75
+ EvidenceManager.instance.sync_timeout_all
76
+ info = EvidenceManager.instance.instance_info @session[:instance]
77
+ assert_equal EvidenceManager::SYNC_IDLE, info['sync_status']
78
+ end
79
+
80
+ def test_sync_end
81
+ EvidenceManager.instance.sync_start @session, *@ident, @now
82
+ EvidenceManager.instance.sync_end @session
83
+ info = EvidenceManager.instance.instance_info @session[:instance]
84
+ assert_equal EvidenceManager::SYNC_IDLE, info['sync_status']
85
+ end
86
+
87
+ def test_sync_not_existent
88
+ File.delete(EvidenceManager::REPO_DIR + '/' + @instance)
89
+ EvidenceManager.instance.sync_end @session
90
+ info = EvidenceManager.instance.instance_info @session[:instance]
91
+ assert_nil info
92
+ end
93
+
94
+ def test_sync_start_start
95
+ EvidenceManager.instance.sync_start @session, *@ident, @now
96
+ EvidenceManager.instance.sync_start @session, *@ident, @now
97
+ info = EvidenceManager.instance.instance_info @session[:instance]
98
+ assert_equal EvidenceManager::SYNC_IN_PROGRESS, info['sync_status']
99
+ end
100
+
101
+ def test_evidence
102
+ evidence = "test-evidence"
103
+ EvidenceManager.instance.sync_start @session, *@ident, @now
104
+ # insert two fake evidences
105
+ id1 = EvidenceManager.instance.store_evidence @session, evidence.length, evidence
106
+ id2 = EvidenceManager.instance.store_evidence @session, evidence.length, evidence
107
+ info = EvidenceManager.instance.evidence_info @session[:instance]
108
+ assert_equal evidence.length, info[0].first
109
+ assert_equal evidence.length, info[1].first
110
+ assert_equal 2, info.length
111
+ assert_true id1 > 0
112
+ assert_true id2 > 0
113
+ assert_true id2 > id1
114
+ end
115
+
116
+ end
117
+
118
+ end #RCS::
119
+ =end
@@ -0,0 +1,35 @@
1
+ require "helper"
2
+
3
+ class FixnumTest < Test::Unit::TestCase
4
+
5
+ def test_byte
6
+ assert_equal "1023 B", 1023.to_s_bytes
7
+ assert_equal "999 B", 999.to_s_bytes(10)
8
+ end
9
+
10
+ def test_kilo
11
+ assert_equal "1.0 KiB", (2**10).to_s_bytes
12
+ assert_equal "1.02 kB", (2**10).to_s_bytes(10)
13
+ assert_equal "1.0 kB", (10**3).to_s_bytes(10)
14
+ end
15
+
16
+ def test_mega
17
+ assert_equal "1.0 MiB", (2**20).to_s_bytes
18
+ assert_equal "1.05 MB", (2**20).to_s_bytes(10)
19
+ assert_equal "1.0 MB", (10**6).to_s_bytes(10)
20
+ end
21
+
22
+ def test_giga
23
+ assert_equal "1.0 GiB", (2**30).to_s_bytes
24
+ assert_equal "1.07 GB", (2**30).to_s_bytes(10)
25
+ assert_equal "1.0 GB", (10**9).to_s_bytes(10)
26
+ end
27
+
28
+ def test_tera
29
+ assert_equal "1.0 TiB", (2**40).to_s_bytes
30
+ assert_equal "1.1 TB", (2**40).to_s_bytes(10)
31
+ assert_equal "1.0 TB", (10**12).to_s_bytes(10)
32
+ end
33
+
34
+
35
+ end
@@ -0,0 +1,137 @@
1
+ # encoding: utf-8
2
+
3
+ require 'helper'
4
+ require 'securerandom'
5
+
6
+ class KeywordsTest < Test::Unit::TestCase
7
+
8
+ def test_dont_modify_source
9
+ input = "abc 123 : 456 + @ pippo"
10
+ source = input.dup
11
+ output = ['123', '456', 'abc', 'pippo']
12
+ assert_equal output, input.keywords
13
+ assert_equal source, input
14
+ end
15
+
16
+ def test_strip
17
+ input = " ciao\n\t miao"
18
+ output = ['ciao', 'miao']
19
+ assert_equal output, input.keywords
20
+ end
21
+
22
+ def test_mixed_case
23
+ input = " CIAO miao ciao"
24
+ output = ['ciao', 'miao']
25
+ assert_equal output, input.keywords
26
+ end
27
+
28
+ def test_punctuation
29
+ input = "ciao, miao. bau; ,pippo !pluto"
30
+ output = ['bau', 'ciao', 'miao', 'pippo', 'pluto']
31
+ assert_equal output, input.keywords
32
+ end
33
+
34
+ def test_email
35
+ input = "a.ornaghi@hackingteam.it mail di alberto"
36
+ output = ["a", "alberto", "di", "hackingteam", "it", "mail", "ornaghi"]
37
+ assert_equal output, input.keywords
38
+ end
39
+
40
+ def test_duplicates
41
+ input = "il mattino ha l'oro in bocca, il mattino ha l'oro in bocca"
42
+ output = ["bocca", "ha", "il", "in", "l", "mattino", "oro"]
43
+ assert_equal output, input.keywords
44
+ end
45
+
46
+ def test_numbers
47
+ input = "123 456 789"
48
+ output = ['123', '456', '789']
49
+ assert_equal output, input.keywords
50
+ end
51
+
52
+ def test_telephone_number
53
+ input = "Il mio numero di telefono: +393480115642"
54
+ output = ['393480115642', 'di', 'il', 'mio', 'numero', 'telefono']
55
+ assert_equal output, input.keywords
56
+ end
57
+
58
+ def test_file_path
59
+ input = "c:\\users\\alor\\documents\\secret\\plan.doc"
60
+ output = ['alor', 'c', 'doc', 'documents', 'plan', 'secret', 'users']
61
+ assert_equal output, input.keywords
62
+ end
63
+
64
+ def test_marks
65
+ input = "do you know me? of course!"
66
+ output = ["course", "do", "know", "me", "of", "you"]
67
+ assert_equal output, input.keywords
68
+ end
69
+
70
+ def test_utf8
71
+ input = "スパイ alor, スパイ... "
72
+ output = ['alor', 'スパイ']
73
+ assert_equal output, input.keywords
74
+ end
75
+
76
+ def test_more_utf8
77
+ input = "Il sØl, è bello!"
78
+ output = ['bello', 'il', 'sØl', 'è']
79
+ assert_equal output, input.keywords
80
+ end
81
+
82
+ def test_symbols
83
+ input = "alor, ... + 10% +3946 / * alor@ht-ht {sid-55--55} [ht] (55%) A&F ht_ht <html>"
84
+ output = ["10", "3946", "55", "a", "alor", "f", "ht", "html", "sid"]
85
+ assert_equal output, input.keywords
86
+ end
87
+
88
+ def test_utf8_symbols
89
+ input = "ス alor, ... + 10% +3946 / * alor@ht-ht {sid-55--55} [ht] (55%) A&F ht_ht <html>"
90
+ output = ["10", "3946", "55", "a", "alor", "f", "ht", "html", "sid", "ス"]
91
+ assert_equal output, input.keywords
92
+ end
93
+
94
+ def test_tweet
95
+ input = "this is cool :) #coolesthing"
96
+ output = ["cool", "coolesthing", "is", "this"]
97
+ assert_equal output, input.keywords
98
+ end
99
+
100
+ def test_ascii
101
+ input = "abc def".force_encoding("ASCII-8BIT")
102
+ output = ["abc", "def"]
103
+ assert_equal output, input.keywords
104
+ end
105
+
106
+ def test_binary
107
+ input = "keep " + SecureRandom.random_bytes(16)
108
+ output = ['keep']
109
+ assert_equal output, output & input.keywords
110
+ end
111
+
112
+ def test_binary_special
113
+ input = "\x6b\x65\x65\x70\x20\xf1\xb5\xfb\xc0\x55\x22\x23\xee\x25\xca\xd9\xde\x02\xef\x0d\xf1"
114
+ output = ['keep']
115
+ assert_equal output, output & input.keywords
116
+ end
117
+
118
+
119
+ def test_avoid_word_too_long
120
+ input = "how do we handle encoded binary like this dGhpcyBpcyBhIHdvcmQgdG9vIGxvbmcK?"
121
+ output = ["binary", "do", "encoded", "handle", "how", "like", "this", "we"]
122
+ assert_equal output, input.keywords
123
+ end
124
+
125
+ def test_invalid_chars
126
+ input = "Menu \x95 U .'7\x95--;,"
127
+ output = ['7', 'menu', 'u']
128
+ assert_equal output, input.keywords
129
+ end
130
+
131
+ def test_real_ocr
132
+ input = "Menu \x95 U .'7\x95--;, '.,1 ID 10:35\ni 4 lli 1 - 1/.\nEll rat\nIP' ,\nContacts Messaging Web\nGo\nGallery Calendar Mail\nStore Vi eos Music p ayer\n\\ li\nSearch Maps Settings\nCt \x97\n\x97"
133
+ output = ["1","10","35","4","7","ayer","calendar","contacts","ct","ell","eos","gallery","go","i","id","ip","li","lli","mail","maps","menu","messaging","music","p","rat","search","settings","store","u","vi","web"]
134
+ assert_equal output, input.keywords
135
+ end
136
+
137
+ end