dxtitle_search 0.2.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ce1fbac8b03c6ed8f54bd015c10f8d0cff189489daef845e2ba7334d51c2f43a
4
- data.tar.gz: 1097753eb43800ed8a1361e605fd4e51b427aef0fffcd67649cec7b04a0c12b7
3
+ metadata.gz: fb844f529ab06c2cc4e084e3f5af0f41ab4425d6dc7b9194b210963f5e37da39
4
+ data.tar.gz: 80a837d7414e45ce4dee39e2a4025ad8419b090d3ae3f057c4236d4d6961a0b5
5
5
  SHA512:
6
- metadata.gz: ef562657ad220e26e087f672a178475fb15c4e41874f7eacf270c1872e0461763a2b145b509541323467c48ad02e74c91b04d0c14cd15e515a22dd08de90644c
7
- data.tar.gz: 812c6c680a264edd2c0ae95fb63b86fb66485642ddb7e2d41696ac1ff51463e6e73cabd4c7fc29cfc8c9fcd9b6e49bb0374f1d1a2f6676e7bfc903afd515dedc
6
+ metadata.gz: 6482d7adffc9dc3ba09925d770ef6ad6aacc0ba771b2e703540d4ef7d34b009b49b60318e8450d617d64ed865bc55d70c8b4b447bb06a2f0c3be86f6fe1cc327
7
+ data.tar.gz: fd11a8b0080b6f48790d5524c14144bdb578761b6e47c6a346c9cb7d736998f5f7b862bac28d91e37bf94abce5b429ed61e28a681e6061683da97cddb8b51a8c
checksums.yaml.gz.sig CHANGED
Binary file
@@ -2,102 +2,99 @@
2
2
 
3
3
  # file: dxtitle_search.rb
4
4
 
5
- require 'dynarex'
5
+ require 'indexer101'
6
6
 
7
7
 
8
8
  class DxTitleSearch
9
+ using ColouredText
9
10
 
10
- def initialize(obj=nil, sources: obj, debug: false)
11
+ def initialize(obj=nil, sources: nil, level: 1, debug: false)
11
12
 
12
13
  @debug = debug
14
+ @indexer = Indexer101.new debug: debug
15
+ @level = level
13
16
 
14
17
  s = if sources then
15
-
18
+
19
+ puts "found sources".info if @debug
16
20
  dx = Dynarex.new(sources)
17
- dx.all.map {|x| read x.uri }.join
18
-
19
- elsif obj then
20
-
21
- # is it a Dynarex file location?
22
- if obj.lines.length < 2 then
23
-
24
- read obj
25
-
26
- else
27
-
28
- obj
29
-
30
- end
31
- end
32
-
33
- @h = h = s.lines.inject({}) do |r,x|
34
- key, value = x.split(/\s+(?=[^\s]+$)/,2)
35
- r.merge(key.rstrip => value)
21
+
22
+ puts 'before scan_dxindex' if @debug
23
+ a = dx.all.map(&:uri)
24
+ puts 'a: ' + a.inspect if @debug
25
+ @indexer.scan_dxindex a, level: level
26
+
27
+ elsif obj and (obj.is_a?(DxLite) or obj.is_a?(Dynarex)) or obj.lines.length < 2
28
+
29
+ @indexer.scan_dxindex obj, level: level
30
+
36
31
  end
37
32
 
38
- @a = h.keys
33
+ #jr230620 @indexer.build
39
34
 
40
35
  end
41
36
 
42
- def search(keywords)
43
-
44
- phrases = @a.grep /#{keywords}/i
37
+ def search(keywords, minchars: 3)
38
+
39
+ a2 = @indexer.search keywords.split(/[\s:"!\?\(\)£]+(?=[\w#_'-]+)/), \
40
+ minchars: minchars
41
+ # format each result as a Hash object
42
+ a3 = a2.map do |date, title, url|
43
+
44
+ {title: title, url: url, date: date}
45
45
 
46
- # find out the keywords count for each entry found
47
- a0 = keywords.split.flat_map do |x|
48
- next if @a.length < 2
49
- @a.grep /#{x}/i
50
46
  end
51
-
52
- a = a0.uniq.map do |entry|
53
- [entry, entry.scan(/#{keywords.split.join('|')}/).uniq.count]
47
+
48
+ puts 'a3: ' + a3.inspect if @debug
49
+
50
+ @dx = Dynarex.new('results/result(title, url, date)').import(a3)
51
+
52
+ def a3.to_dx()
53
+ Dynarex.new('results/result(title, url, date)').import(self)
54
54
  end
55
55
 
56
- # sort by keywords found per entry and then date
57
- #a2 = (phrases + a).uniq.sort do |x, x2|
58
- a2 = a.sort do |x, x2|
59
- -([x.last, x.first[/^\d+/], ] <=> [x2.last, x2.first[/^\d+/]])
56
+ def a3.to_tags()
57
+ a = self.map {|x| x[:title].scan(/(?<=#)(\w+)/)}.flatten
58
+ a.uniq.sort.map {|x| [x, a.count(x)]}
60
59
  end
61
60
 
62
- # format each result as a Hash object
63
- a3 = (phrases + a2).map do |x|
61
+ def a3.search(keywords)
64
62
 
65
- if x.length > 1 then
66
- line, _ = x
67
- else
68
- line = x
69
- end
70
-
71
- puts 'line: ' + line.inspect if @debug
72
-
73
- rawtime, title = line.split(/ +/,2)
74
- puts 'title: ' + title.inspect if @debug
63
+ dx = Dynarex.new('results/result(title, url, date)').import(self)
75
64
 
76
- {title: title, url: @h[line].chomp, date: Time.at(rawtime.to_i)}
65
+ level = keywords[0] == '#' ? 0 : 1
66
+ dts = DxTitleSearch.new dx, level: level
67
+ dts.search keywords
77
68
 
78
69
  end
79
70
 
80
- puts 'a3: ' + a3.inspect if @debug
71
+ def a3.tag_search(keywords)
81
72
 
82
- return a3
73
+ dx = Dynarex.new('results/result(title, url, date)').import(self)
74
+
75
+ level = keywords[0] == '#' ? 0 : 1
76
+ dts = DxTitleSearch.new dx, level: level
77
+ dts.tag_search keywords
78
+
79
+ end
80
+
81
+ return Array.new(a3)
83
82
 
84
83
  end
85
-
84
+
86
85
  def tag_search(keywords)
87
- a = @a.flat_map {|x| x.split(/#/,2).last.split(/\s*#/)}
88
- a.grep(/^#{keywords}/i).map(&:downcase).uniq
86
+ r = @indexer.lookup *keywords.split(/[\W]+(?=[\w]+)/).map {|x| "#" + x}
87
+ r.map {|x| x.to_s[1..-1]}
89
88
  end
90
-
91
- private
92
-
93
- def read(source)
94
89
 
95
- dx = Dynarex.new(source)
90
+ def to_tags()
91
+
92
+ a = @indexer.index.map do |key, value|
93
+ [key.to_s[1..-1], value.length]
94
+ end
95
+
96
+ a.sort_by(&:first)
96
97
 
97
- dx.all.map do |x|
98
- "%d %s %s" % [Time.parse(x.created).to_i, x.title, x.url]
99
- end.join("\n")
100
-
101
98
  end
102
99
 
103
100
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dxtitle_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -11,54 +11,54 @@ cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
13
  MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkwNzIzMjE0NjM1WhcN
15
- MjAwNzIyMjE0NjM1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQC4fglD
17
- MJeEttUrAcVhZbHOvDxmej9AsrgXLSOdp7pVoUXQk1tEVjKH+1ibOhxxhx5Eiptk
18
- UGqqb/X67jXasjLr1wtY2dw/LKLAIoX5iPqE4vI0I0L5tGSF9i60f1KdgfIpgnx+
19
- fVNq32btdyoFuJBNG7yDBFjJ21ao5kyO9xpSUHnUDrrwyrdTZc9v9rG0kUEKzK/w
20
- 9obVvTWBmfIjBt3Vf5O2f88S+6boBJu6fDn0smnJEScHwTgj65yLN+ejPAanlqqt
21
- sxxxjLkdYl3LVDRUKXL09XcENdDFKwyiZu7kKJiVq7kvUcwMOBFobp2iWY9Yiam9
22
- HeRjOr8W35bZ3xScPLIVU08BYscUxoOHsMRpNPUY5I8SIYCLPJ7YvBO4ZlxJT5xQ
23
- E7M8aG/uKxwHKgHJu/3siL2cU/YtkVNS9yTxHdzD++90q3sC8uOnBLUMDTsKpV4l
24
- +jHMccyHbQXoazVQoVu7xNup2BpbiHALacarCLvu9GGJLrXkcX40ujB+TMcCAwEA
25
- AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUTUrPUS7w
26
- ILFIUp8s90zt8ljXtpkwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjEwMjIyMDAxMTU0WhcN
15
+ MjIwMjIyMDAxMTU0WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDxwG7T
17
+ /9FLpGJct6ptTTRa2UnYj9qeRp2KmjnMpONFP7Owl1yjh2auzyIfDChG7G2W9kZ4
18
+ eOPno39R2GJOxt5IVTxmjgBpAUl8xW3n629b2jE7al1GJZ89Jm3Fqs1rBeUaC0wB
19
+ DIX4SVwHqfzxvZTVLVZ5LPIKqxNiJe8eUIRRzzc0/XzyFTh3Sg4SH1ncm04vvxWg
20
+ piQyL6iUeUuirHjotchLINWzi2A4PgO63/YYM3tYPzbykkiBrE/iJHnPuFEjNfIj
21
+ +tVTvPB0wXdCDNf6yHr9cfy0t97eVGTbwv+UhWX9Vmz6/t4bi9X2xhmdp32y9CK6
22
+ s/I16J3oC70JvsTKOhDAM3OMQFjCpBbtyFYuJDbDbQKD/elGXTHvAmCi5skCNJzE
23
+ oq1l5aVpp3IFrZQfLEimeDDEq8ulEajlllpitq1vD+vycRSydybtAhMs/kK7DY7D
24
+ kVGOI1HDz5p+A6e8wyXoecdaQeswH3gd3nkCKfV7ohKXXywW8JWMEBgYj4ECAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU1qWuKrKD
26
+ A90s8y+KBolbTgtLOP8wJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
27
  c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
- BgkqhkiG9w0BAQsFAAOCAYEAaEl3CKusWW3/wUutwDz0w0ojYuc48LOVrmGpZ8uH
29
- fi2wJx5Jt41/qxGWCz0qo/BFfFZjNllblywzhjIw80HnoGAR5bDaKF0fTdYsh++a
30
- Vyx6BkFVM5hYXIJklBT/PLR7JW/rOrFgPNrXM+BlWlFBGIQT8KaF7SfOYrDdqHG8
31
- uLlGPgLkR0U3tLfyWwF6548QRVz2Lm53dWeT0JGs974Dj+n1b8P3N0Uv1AFpetNB
32
- UTelmW2QjiLgxJxkFkRjp/Ep0s/puw3FbtaviQDBBRr9I/bMAE5QOtKa3k1e7m70
33
- 39utqhm1XpZeJamPbeuSFl6M3RRiwWACVlz9QjWVgA5JSf1GwCsc0nsa76pq7Yfj
34
- DtQaJilztwDXwMGWAJ5qDOiGcuv3Wmec+Nnr/9xoZRA9V/9GqXlWjqSSLgFHk/r1
35
- ODgOmcgAsTin0T9l0vWQJnj2OClh1pcC6tFnQXXcYuwTpbtM2v4OmUfUDvJmwQ42
36
- POydZUYZg49X9TlTDqPMKed0
28
+ BgkqhkiG9w0BAQsFAAOCAYEAHJI3akrM/ne2v1zYcZOy0POu/LLfcBM93BuQcfiN
29
+ ok256P7VWXSjU2TaR8/vLDK0dbZo5V08WCKk08yuqOdzOII7JSVh0QHzzgFTR7yS
30
+ RG3xDZtp6j4MXzoAkLd06rIL1O21GXGc49Wkc7XHeFytbRF8XhT29e4H4s5KMoJ1
31
+ l9Ks2oMc4USzKo7loCU+1q0Sg8BQEpS2+9RNTpG7MKDdneWXhb2o6UTQ3WIDpfEt
32
+ ImShbAmoVHcdMa3HZUH0Pa+pzr/KrM0sg2fa2trlJS9oDq9pZl/cQO6Ryl76lqKg
33
+ bQ7jxCYFron1UH6iS3bJBPfFmIWcio3zT8VmeqVPhXatDDnb00JijkO/WThobMxb
34
+ aZJQ2A0sZWHF9u5v9gda8mq21CID3YMPRzQ1u5o4jjyNhcVyjW29GvfD8+8D4s46
35
+ g10O6gIBUztpsNHwbBTi9fHgtis4E5dAPAo4uzo4MCKVwi72iZQ3E92ACYb9zlxA
36
+ g6CsTY0vUjhgfh9taSRj3qxd
37
37
  -----END CERTIFICATE-----
38
- date: 2019-08-11 00:00:00.000000000 Z
38
+ date: 2022-01-25 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: dynarex
41
+ name: indexer101
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.8'
46
+ version: '0.2'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.8.19
49
+ version: 0.2.5
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '1.8'
56
+ version: '0.2'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.8.19
59
+ version: 0.2.5
60
60
  description:
61
- email: james@jamesrobertson.eu
61
+ email: digital.robertson@gmail.com
62
62
  executables: []
63
63
  extensions: []
64
64
  extra_rdoc_files: []
@@ -83,8 +83,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  requirements: []
86
- rubygems_version: 3.0.1
86
+ rubyforge_project:
87
+ rubygems_version: 2.7.10
87
88
  signing_key:
88
89
  specification_version: 4
89
- summary: Search title entries from plain text (derived from a Dynarex document).
90
+ summary: Originally intended to search title entries from plain text (derived from
91
+ a Dynarex document).
90
92
  test_files: []
metadata.gz.sig CHANGED
Binary file