dxtitle_search 0.1.1 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df32efa0600562a68efdde1eb315b79444c5b33b1ba32a4784976819ac62c241
4
- data.tar.gz: 683deed83c63b4e755ff02b084e3b3e7d436248eb1a6db62fe0bb2e511024e08
3
+ metadata.gz: 2ddaa5dc16334b0c89dfca449e875a99e35ff110eab534aa2b0b3b8ff9299dd4
4
+ data.tar.gz: f236adf29de45746e766880c6da22f78b3829d192f91e3cf8a8ad25a1fe39452
5
5
  SHA512:
6
- metadata.gz: 3533f42a47a75ae9f5dee05100e8a8b1594e9ac9f2dd5aeb926071a339fc7cbad82c0faa32e724924dd4e16e3c6d4465864e6c3560f042fa25bbb937d3ca80f9
7
- data.tar.gz: fd0423876c4ded5577f0e8ced346a61520ac9df7f1d18b88dadfa3ef59f057a7d2a6ece5c9de67f5ef7b25a73e32edc479a45498a38ac7e742bbb155d7dc55de
6
+ metadata.gz: e19af732c9c6ff0c0a389c8b03ff4d1a0a5b932c047a74060c7853b1e52f135dba563b9195c71b1a1fd78313a988ab18dc88078db625a01b1f3e0ee9788eb025
7
+ data.tar.gz: c34c280d1dcd920e613f49af75c0a45f2f1a900a1baae438033c6fd50422e11c8bbe1756040e1cdad2204c62f2625e06551943a7d6243563d5c55edadb0859e0
checksums.yaml.gz.sig CHANGED
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -2,69 +2,58 @@
2
2
 
3
3
  # file: dxtitle_search.rb
4
4
 
5
- require 'dynarex'
5
+ require 'indexer101'
6
6
 
7
7
 
8
8
  class DxTitleSearch
9
9
 
10
- def initialize(obj, debug: false)
10
+ def initialize(obj=nil, sources: nil, level: 1, debug: false)
11
11
 
12
12
  @debug = debug
13
-
14
- # is it a Dynarex file location?
15
- s = if obj.lines.length < 2 then
16
-
17
- xml = RXFHelper.read(obj).first
18
-
19
- dx = Dynarex.new(xml)
20
-
21
- dx.all.map do |x|
22
- "%d %s %s" % [Time.parse(x.created).to_i, x.title, x.url]
23
- end.join("\n")
24
-
25
- else
26
- obj
27
- end
28
-
29
- @h = h = s.lines.inject({}) do |r,x|
30
- key, value = x.split(/\s+(?=[^\s]+$)/,2)
31
- r.merge(key.rstrip => value)
13
+ @indexer = Indexer101.new debug: debug
14
+
15
+ s = if sources then
16
+
17
+ dx = Dynarex.new(sources)
18
+ puts 'before scan_dxindex' if @debug
19
+ a = dx.all.map(&:uri)
20
+ puts 'a: ' + a.inspect if @debug
21
+ @indexer.scan_dxindex a, level: level
22
+
23
+ elsif obj and obj.lines.length < 2
24
+
25
+ @indexer.scan_dxindex obj, level: level
26
+
32
27
  end
33
-
34
- @a = h.keys
28
+
29
+ #jr230620 @indexer.build
35
30
 
36
31
  end
37
32
 
38
- def search(keywords)
39
-
40
- # find out the keywords count for each entry found
41
- a = keywords.split.flat_map {|x| @a.grep /#{x}/i }.uniq.map do |entry|
42
- [entry, entry.scan(/#{keywords.split.join('|')}/).uniq.count]
43
- end
44
-
45
- # sort by keywords found per entry and then date
46
- a2 = a.sort do |x, x2|
47
- -([x.last, x.first[/^\d+/], ] <=> [x2.last, x2.first[/^\d+/]])
48
- end
33
+ def search(keywords, minchars: 3)
49
34
 
35
+ a2 = @indexer.search keywords.split(/[\s:"!\?\(\)£]+(?=[\w#_'-]+)/), \
36
+ minchars: minchars
50
37
  # format each result as a Hash object
51
- a3 = a2.map do |x|
38
+ a3 = a2.map do |date, title, url|
52
39
 
53
- line, _ = x
54
- puts 'line: ' + line.inspect if @debug
55
-
56
- rawtime, title = line.split(/ +/,2)
57
- puts 'title: ' + title.inspect if @debug
58
-
59
- {title: title, url: @h[line], date: Time.at(rawtime.to_i)}
40
+ {title: title, url: url, date: date}
60
41
 
61
42
  end
62
43
 
63
44
  puts 'a3: ' + a3.inspect if @debug
45
+
46
+ def a3.to_dx()
47
+ Dynarex.new('results/result(title, url, date)').import(self)
48
+ end
64
49
 
65
50
  return a3
66
51
 
67
52
  end
53
+
54
+ def tag_search(keywords)
55
+ r = @indexer.lookup *keywords.split(/[\W]+(?=[\w]+)/).map {|x| "#" + x}
56
+ r.map {|x| x.to_s[1..-1]}
57
+ end
68
58
 
69
59
  end
70
-
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dxtitle_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -11,54 +11,54 @@ cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
13
  MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkwNzIzMjE0NjM1WhcN
15
- MjAwNzIyMjE0NjM1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQC4fglD
17
- MJeEttUrAcVhZbHOvDxmej9AsrgXLSOdp7pVoUXQk1tEVjKH+1ibOhxxhx5Eiptk
18
- UGqqb/X67jXasjLr1wtY2dw/LKLAIoX5iPqE4vI0I0L5tGSF9i60f1KdgfIpgnx+
19
- fVNq32btdyoFuJBNG7yDBFjJ21ao5kyO9xpSUHnUDrrwyrdTZc9v9rG0kUEKzK/w
20
- 9obVvTWBmfIjBt3Vf5O2f88S+6boBJu6fDn0smnJEScHwTgj65yLN+ejPAanlqqt
21
- sxxxjLkdYl3LVDRUKXL09XcENdDFKwyiZu7kKJiVq7kvUcwMOBFobp2iWY9Yiam9
22
- HeRjOr8W35bZ3xScPLIVU08BYscUxoOHsMRpNPUY5I8SIYCLPJ7YvBO4ZlxJT5xQ
23
- E7M8aG/uKxwHKgHJu/3siL2cU/YtkVNS9yTxHdzD++90q3sC8uOnBLUMDTsKpV4l
24
- +jHMccyHbQXoazVQoVu7xNup2BpbiHALacarCLvu9GGJLrXkcX40ujB+TMcCAwEA
25
- AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUTUrPUS7w
26
- ILFIUp8s90zt8ljXtpkwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjEwMjIyMDAxMTU0WhcN
15
+ MjIwMjIyMDAxMTU0WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDxwG7T
17
+ /9FLpGJct6ptTTRa2UnYj9qeRp2KmjnMpONFP7Owl1yjh2auzyIfDChG7G2W9kZ4
18
+ eOPno39R2GJOxt5IVTxmjgBpAUl8xW3n629b2jE7al1GJZ89Jm3Fqs1rBeUaC0wB
19
+ DIX4SVwHqfzxvZTVLVZ5LPIKqxNiJe8eUIRRzzc0/XzyFTh3Sg4SH1ncm04vvxWg
20
+ piQyL6iUeUuirHjotchLINWzi2A4PgO63/YYM3tYPzbykkiBrE/iJHnPuFEjNfIj
21
+ +tVTvPB0wXdCDNf6yHr9cfy0t97eVGTbwv+UhWX9Vmz6/t4bi9X2xhmdp32y9CK6
22
+ s/I16J3oC70JvsTKOhDAM3OMQFjCpBbtyFYuJDbDbQKD/elGXTHvAmCi5skCNJzE
23
+ oq1l5aVpp3IFrZQfLEimeDDEq8ulEajlllpitq1vD+vycRSydybtAhMs/kK7DY7D
24
+ kVGOI1HDz5p+A6e8wyXoecdaQeswH3gd3nkCKfV7ohKXXywW8JWMEBgYj4ECAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU1qWuKrKD
26
+ A90s8y+KBolbTgtLOP8wJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
27
  c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
- BgkqhkiG9w0BAQsFAAOCAYEAaEl3CKusWW3/wUutwDz0w0ojYuc48LOVrmGpZ8uH
29
- fi2wJx5Jt41/qxGWCz0qo/BFfFZjNllblywzhjIw80HnoGAR5bDaKF0fTdYsh++a
30
- Vyx6BkFVM5hYXIJklBT/PLR7JW/rOrFgPNrXM+BlWlFBGIQT8KaF7SfOYrDdqHG8
31
- uLlGPgLkR0U3tLfyWwF6548QRVz2Lm53dWeT0JGs974Dj+n1b8P3N0Uv1AFpetNB
32
- UTelmW2QjiLgxJxkFkRjp/Ep0s/puw3FbtaviQDBBRr9I/bMAE5QOtKa3k1e7m70
33
- 39utqhm1XpZeJamPbeuSFl6M3RRiwWACVlz9QjWVgA5JSf1GwCsc0nsa76pq7Yfj
34
- DtQaJilztwDXwMGWAJ5qDOiGcuv3Wmec+Nnr/9xoZRA9V/9GqXlWjqSSLgFHk/r1
35
- ODgOmcgAsTin0T9l0vWQJnj2OClh1pcC6tFnQXXcYuwTpbtM2v4OmUfUDvJmwQ42
36
- POydZUYZg49X9TlTDqPMKed0
28
+ BgkqhkiG9w0BAQsFAAOCAYEAHJI3akrM/ne2v1zYcZOy0POu/LLfcBM93BuQcfiN
29
+ ok256P7VWXSjU2TaR8/vLDK0dbZo5V08WCKk08yuqOdzOII7JSVh0QHzzgFTR7yS
30
+ RG3xDZtp6j4MXzoAkLd06rIL1O21GXGc49Wkc7XHeFytbRF8XhT29e4H4s5KMoJ1
31
+ l9Ks2oMc4USzKo7loCU+1q0Sg8BQEpS2+9RNTpG7MKDdneWXhb2o6UTQ3WIDpfEt
32
+ ImShbAmoVHcdMa3HZUH0Pa+pzr/KrM0sg2fa2trlJS9oDq9pZl/cQO6Ryl76lqKg
33
+ bQ7jxCYFron1UH6iS3bJBPfFmIWcio3zT8VmeqVPhXatDDnb00JijkO/WThobMxb
34
+ aZJQ2A0sZWHF9u5v9gda8mq21CID3YMPRzQ1u5o4jjyNhcVyjW29GvfD8+8D4s46
35
+ g10O6gIBUztpsNHwbBTi9fHgtis4E5dAPAo4uzo4MCKVwi72iZQ3E92ACYb9zlxA
36
+ g6CsTY0vUjhgfh9taSRj3qxd
37
37
  -----END CERTIFICATE-----
38
- date: 2019-07-23 00:00:00.000000000 Z
38
+ date: 2021-02-22 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: dynarex
41
+ name: indexer101
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.8'
46
+ version: '0.2'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.8.19
49
+ version: 0.2.4
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '1.8'
56
+ version: '0.2'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.8.19
59
+ version: 0.2.4
60
60
  description:
61
- email: james@jamesrobertson.eu
61
+ email: digital.robertson@gmail.com
62
62
  executables: []
63
63
  extensions: []
64
64
  extra_rdoc_files: []
@@ -83,8 +83,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  requirements: []
86
- rubygems_version: 3.0.1
86
+ rubyforge_project:
87
+ rubygems_version: 2.7.10
87
88
  signing_key:
88
89
  specification_version: 4
89
- summary: Search title entries from plain text (derived from a Dynarex document).
90
+ summary: Originally intended to search title entries from plain text (derived from
91
+ a Dynarex document).
90
92
  test_files: []
metadata.gz.sig CHANGED
Binary file