xws 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (6) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data.tar.gz.sig +0 -0
  4. data/lib/xws.rb +43 -0
  5. metadata +88 -0
  6. metadata.gz.sig +1 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5810f7acaa63084378d114ea44d5707e0e7f4f9b
4
+ data.tar.gz: 99a35051dc284771d557c6c59b30dd3ae97809c0
5
+ SHA512:
6
+ metadata.gz: edffc30afb812e82fbbb11a641c2531081e432d3c1ebad20886b3ee5570a61b55c63746f7ee62d8593efdeca4ef36e94186277b5b2e9f43c1066a66beffec25c
7
+ data.tar.gz: 215a721bcdf607e48953a5173997aba18ef61562b74eac69d6b72157be8ad311579b225b678132f051d19870de645c85da72198347f2fe81330eeb24f7d88881
checksums.yaml.gz.sig ADDED
Binary file
data.tar.gz.sig ADDED
Binary file
data/lib/xws.rb ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: xws.rb
4
+
5
+ require 'rexle'
6
+
7
+
8
+ class XWS
9
+
10
+ def initialize(ignore_elements: %i(pre code time))
11
+
12
+ @ignore_elements = ignore_elements
13
+ @ignorewords = %i(the and or)
14
+ end
15
+
16
+ def scan(node)
17
+
18
+ a = []
19
+
20
+ node.each_recursive do |x|
21
+ puts 'x : ' + x.inspect
22
+ if not x.name[/#{@ignore_elements.join('|')}/] then
23
+ a += x.texts.map(&:strip).select{|x| not x.empty?}
24
+ end
25
+ end
26
+
27
+ words(a.join(' ')).split.group_by(&:to_s).\
28
+ inject({}){|r, x| r.merge(x[0] => x[-1].length)}
29
+
30
+ end
31
+
32
+ def words(s)
33
+
34
+ s.downcase.
35
+ gsub(/\w+'\w+/,''). # remove words containing an apostrophe
36
+ gsub(/["']/,''). # remove quotation marks
37
+ gsub(/(\w)[^a-z ]+\B|\B[^a-z #]+(\w)/,'\1\2'). # remove non-alpabetical characters from start or beginning of words
38
+ gsub(/\s.\s/,' '). # remove single digits
39
+ gsub(/\b#{@ignorewords.join('|')}\b/,'')
40
+
41
+ end
42
+
43
+ end
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xws
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIDljCCAn6gAwIBAgIBATANBgkqhkiG9w0BAQUFADBIMRIwEAYDVQQDDAlnZW1t
14
+ YXN0ZXIxHjAcBgoJkiaJk/IsZAEZFg5qYW1lc3JvYmVydHNvbjESMBAGCgmSJomT
15
+ 8ixkARkWAmV1MB4XDTE1MTExMjEzNDIwOVoXDTE2MTExMTEzNDIwOVowSDESMBAG
16
+ A1UEAwwJZ2VtbWFzdGVyMR4wHAYKCZImiZPyLGQBGRYOamFtZXNyb2JlcnRzb24x
17
+ EjAQBgoJkiaJk/IsZAEZFgJldTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
18
+ ggEBAMTc5Y2iGpVpgA0bMCMkawvWl45yZmC093FTo0ouHVxXqD6Spl6qd69TeJ47
19
+ b1luuUHWbWSD4kPgRrXCUSl3cBfSWHnrr7BBFo/s4KHLAeZeVu5oW4vyMe7LoeSq
20
+ 7m4OyHkXx1++i9p1StO3Xq4ATlJyDK88c4h0qwqLDtYbW7uo+MGDyTXu87JEE0PE
21
+ gE1zD6WNfgm/n5pbI8inijTD1Z8TUj+XTwDK88/q9qfUaZXVLFR6ZABwxU22wbNB
22
+ zvmIeQQTjHjf5WPoqEILIN64grKBKE2SkJfG0KcEEfK8MFVhvmzZRHJ06dDHaMjx
23
+ Gl6P8QmwkxCE21zlWv95M8W85tkCAwEAAaOBijCBhzAJBgNVHRMEAjAAMAsGA1Ud
24
+ DwQEAwIEsDAdBgNVHQ4EFgQUVXguJCp/zrEkJCkHCtFDEMWqv2owJgYDVR0RBB8w
25
+ HYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1h
26
+ c3RlckBqYW1lc3JvYmVydHNvbi5ldTANBgkqhkiG9w0BAQUFAAOCAQEAiEmEjzhh
27
+ zKdOtVNj+UPC4BgiAcx6Y5RVFlRhu4YHmbTNy69ufi9C96g+rY3i+x+Wai99mgPM
28
+ 0h9/iddDRokLOR4W26uZU187+MAxXHsMckIp5dMdSfLuqTSJe3yUt5iPutwSKHQu
29
+ pmWTTFlIxLlkcaAEdNmHdTW20qyxunhH5ejArJk8r33/odbQFfWPl6nUQbTqVAC7
30
+ dfHZ+bJE6nkLJVhFdKH+VCvr7bE7aa05//GN12RwurUpuSJ/ZA4EPL8InaykCUHh
31
+ gBElgRX/rNJ5r9tumb1VJ+TZMg8fu7CmCoC4FuJtnA7ajJHeC0JKSpCDYtgiOs4W
32
+ ET+JXG7GYubSoQ==
33
+ -----END CERTIFICATE-----
34
+ date: 2015-11-12 00:00:00.000000000 Z
35
+ dependencies:
36
+ - !ruby/object:Gem::Dependency
37
+ name: rexle
38
+ requirement: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - "~>"
41
+ - !ruby/object:Gem::Version
42
+ version: '1.3'
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: 1.3.9
46
+ type: :runtime
47
+ prerelease: false
48
+ version_requirements: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - "~>"
51
+ - !ruby/object:Gem::Version
52
+ version: '1.3'
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: 1.3.9
56
+ description:
57
+ email: james@r0bertson.co.uk
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - lib/xws.rb
63
+ homepage: https://github.com/jrobertson/xws
64
+ licenses:
65
+ - MIT
66
+ metadata: {}
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ">="
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ requirements: []
82
+ rubyforge_project:
83
+ rubygems_version: 2.4.8
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: XML Word Scanner (XWS) reads a Rexle document and returns a hash object containing
87
+ each word found.
88
+ test_files: []
metadata.gz.sig ADDED
@@ -0,0 +1 @@
1
+ 7@�9���}�Vꯐ]x�o��rBI�j3p�2-�t~`e<m�Yh*� 6g�����a\�&:ġa�g�Î)6���]�]nG���1�&�0� %��t���������#,U���Q0�H�.��ōmϼ72ˋǨÔ,����]�#��wm![