dxtitle_search 0.2.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ce1fbac8b03c6ed8f54bd015c10f8d0cff189489daef845e2ba7334d51c2f43a
4
- data.tar.gz: 1097753eb43800ed8a1361e605fd4e51b427aef0fffcd67649cec7b04a0c12b7
3
+ metadata.gz: fb844f529ab06c2cc4e084e3f5af0f41ab4425d6dc7b9194b210963f5e37da39
4
+ data.tar.gz: 80a837d7414e45ce4dee39e2a4025ad8419b090d3ae3f057c4236d4d6961a0b5
5
5
  SHA512:
6
- metadata.gz: ef562657ad220e26e087f672a178475fb15c4e41874f7eacf270c1872e0461763a2b145b509541323467c48ad02e74c91b04d0c14cd15e515a22dd08de90644c
7
- data.tar.gz: 812c6c680a264edd2c0ae95fb63b86fb66485642ddb7e2d41696ac1ff51463e6e73cabd4c7fc29cfc8c9fcd9b6e49bb0374f1d1a2f6676e7bfc903afd515dedc
6
+ metadata.gz: 6482d7adffc9dc3ba09925d770ef6ad6aacc0ba771b2e703540d4ef7d34b009b49b60318e8450d617d64ed865bc55d70c8b4b447bb06a2f0c3be86f6fe1cc327
7
+ data.tar.gz: fd11a8b0080b6f48790d5524c14144bdb578761b6e47c6a346c9cb7d736998f5f7b862bac28d91e37bf94abce5b429ed61e28a681e6061683da97cddb8b51a8c
checksums.yaml.gz.sig CHANGED
Binary file
@@ -2,102 +2,99 @@
2
2
 
3
3
  # file: dxtitle_search.rb
4
4
 
5
- require 'dynarex'
5
+ require 'indexer101'
6
6
 
7
7
 
8
8
  class DxTitleSearch
9
+ using ColouredText
9
10
 
10
- def initialize(obj=nil, sources: obj, debug: false)
11
+ def initialize(obj=nil, sources: nil, level: 1, debug: false)
11
12
 
12
13
  @debug = debug
14
+ @indexer = Indexer101.new debug: debug
15
+ @level = level
13
16
 
14
17
  s = if sources then
15
-
18
+
19
+ puts "found sources".info if @debug
16
20
  dx = Dynarex.new(sources)
17
- dx.all.map {|x| read x.uri }.join
18
-
19
- elsif obj then
20
-
21
- # is it a Dynarex file location?
22
- if obj.lines.length < 2 then
23
-
24
- read obj
25
-
26
- else
27
-
28
- obj
29
-
30
- end
31
- end
32
-
33
- @h = h = s.lines.inject({}) do |r,x|
34
- key, value = x.split(/\s+(?=[^\s]+$)/,2)
35
- r.merge(key.rstrip => value)
21
+
22
+ puts 'before scan_dxindex' if @debug
23
+ a = dx.all.map(&:uri)
24
+ puts 'a: ' + a.inspect if @debug
25
+ @indexer.scan_dxindex a, level: level
26
+
27
+ elsif obj and (obj.is_a?(DxLite) or obj.is_a?(Dynarex)) or obj.lines.length < 2
28
+
29
+ @indexer.scan_dxindex obj, level: level
30
+
36
31
  end
37
32
 
38
- @a = h.keys
33
+ #jr230620 @indexer.build
39
34
 
40
35
  end
41
36
 
42
- def search(keywords)
43
-
44
- phrases = @a.grep /#{keywords}/i
37
+ def search(keywords, minchars: 3)
38
+
39
+ a2 = @indexer.search keywords.split(/[\s:"!\?\(\)£]+(?=[\w#_'-]+)/), \
40
+ minchars: minchars
41
+ # format each result as a Hash object
42
+ a3 = a2.map do |date, title, url|
43
+
44
+ {title: title, url: url, date: date}
45
45
 
46
- # find out the keywords count for each entry found
47
- a0 = keywords.split.flat_map do |x|
48
- next if @a.length < 2
49
- @a.grep /#{x}/i
50
46
  end
51
-
52
- a = a0.uniq.map do |entry|
53
- [entry, entry.scan(/#{keywords.split.join('|')}/).uniq.count]
47
+
48
+ puts 'a3: ' + a3.inspect if @debug
49
+
50
+ @dx = Dynarex.new('results/result(title, url, date)').import(a3)
51
+
52
+ def a3.to_dx()
53
+ Dynarex.new('results/result(title, url, date)').import(self)
54
54
  end
55
55
 
56
- # sort by keywords found per entry and then date
57
- #a2 = (phrases + a).uniq.sort do |x, x2|
58
- a2 = a.sort do |x, x2|
59
- -([x.last, x.first[/^\d+/], ] <=> [x2.last, x2.first[/^\d+/]])
56
+ def a3.to_tags()
57
+ a = self.map {|x| x[:title].scan(/(?<=#)(\w+)/)}.flatten
58
+ a.uniq.sort.map {|x| [x, a.count(x)]}
60
59
  end
61
60
 
62
- # format each result as a Hash object
63
- a3 = (phrases + a2).map do |x|
61
+ def a3.search(keywords)
64
62
 
65
- if x.length > 1 then
66
- line, _ = x
67
- else
68
- line = x
69
- end
70
-
71
- puts 'line: ' + line.inspect if @debug
72
-
73
- rawtime, title = line.split(/ +/,2)
74
- puts 'title: ' + title.inspect if @debug
63
+ dx = Dynarex.new('results/result(title, url, date)').import(self)
75
64
 
76
- {title: title, url: @h[line].chomp, date: Time.at(rawtime.to_i)}
65
+ level = keywords[0] == '#' ? 0 : 1
66
+ dts = DxTitleSearch.new dx, level: level
67
+ dts.search keywords
77
68
 
78
69
  end
79
70
 
80
- puts 'a3: ' + a3.inspect if @debug
71
+ def a3.tag_search(keywords)
81
72
 
82
- return a3
73
+ dx = Dynarex.new('results/result(title, url, date)').import(self)
74
+
75
+ level = keywords[0] == '#' ? 0 : 1
76
+ dts = DxTitleSearch.new dx, level: level
77
+ dts.tag_search keywords
78
+
79
+ end
80
+
81
+ return Array.new(a3)
83
82
 
84
83
  end
85
-
84
+
86
85
  def tag_search(keywords)
87
- a = @a.flat_map {|x| x.split(/#/,2).last.split(/\s*#/)}
88
- a.grep(/^#{keywords}/i).map(&:downcase).uniq
86
+ r = @indexer.lookup *keywords.split(/[\W]+(?=[\w]+)/).map {|x| "#" + x}
87
+ r.map {|x| x.to_s[1..-1]}
89
88
  end
90
-
91
- private
92
-
93
- def read(source)
94
89
 
95
- dx = Dynarex.new(source)
90
+ def to_tags()
91
+
92
+ a = @indexer.index.map do |key, value|
93
+ [key.to_s[1..-1], value.length]
94
+ end
95
+
96
+ a.sort_by(&:first)
96
97
 
97
- dx.all.map do |x|
98
- "%d %s %s" % [Time.parse(x.created).to_i, x.title, x.url]
99
- end.join("\n")
100
-
101
98
  end
102
99
 
103
100
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dxtitle_search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -11,54 +11,54 @@ cert_chain:
11
11
  - |
12
12
  -----BEGIN CERTIFICATE-----
13
13
  MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
- YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkwNzIzMjE0NjM1WhcN
15
- MjAwNzIyMjE0NjM1WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
- cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQC4fglD
17
- MJeEttUrAcVhZbHOvDxmej9AsrgXLSOdp7pVoUXQk1tEVjKH+1ibOhxxhx5Eiptk
18
- UGqqb/X67jXasjLr1wtY2dw/LKLAIoX5iPqE4vI0I0L5tGSF9i60f1KdgfIpgnx+
19
- fVNq32btdyoFuJBNG7yDBFjJ21ao5kyO9xpSUHnUDrrwyrdTZc9v9rG0kUEKzK/w
20
- 9obVvTWBmfIjBt3Vf5O2f88S+6boBJu6fDn0smnJEScHwTgj65yLN+ejPAanlqqt
21
- sxxxjLkdYl3LVDRUKXL09XcENdDFKwyiZu7kKJiVq7kvUcwMOBFobp2iWY9Yiam9
22
- HeRjOr8W35bZ3xScPLIVU08BYscUxoOHsMRpNPUY5I8SIYCLPJ7YvBO4ZlxJT5xQ
23
- E7M8aG/uKxwHKgHJu/3siL2cU/YtkVNS9yTxHdzD++90q3sC8uOnBLUMDTsKpV4l
24
- +jHMccyHbQXoazVQoVu7xNup2BpbiHALacarCLvu9GGJLrXkcX40ujB+TMcCAwEA
25
- AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUTUrPUS7w
26
- ILFIUp8s90zt8ljXtpkwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjEwMjIyMDAxMTU0WhcN
15
+ MjIwMjIyMDAxMTU0WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDxwG7T
17
+ /9FLpGJct6ptTTRa2UnYj9qeRp2KmjnMpONFP7Owl1yjh2auzyIfDChG7G2W9kZ4
18
+ eOPno39R2GJOxt5IVTxmjgBpAUl8xW3n629b2jE7al1GJZ89Jm3Fqs1rBeUaC0wB
19
+ DIX4SVwHqfzxvZTVLVZ5LPIKqxNiJe8eUIRRzzc0/XzyFTh3Sg4SH1ncm04vvxWg
20
+ piQyL6iUeUuirHjotchLINWzi2A4PgO63/YYM3tYPzbykkiBrE/iJHnPuFEjNfIj
21
+ +tVTvPB0wXdCDNf6yHr9cfy0t97eVGTbwv+UhWX9Vmz6/t4bi9X2xhmdp32y9CK6
22
+ s/I16J3oC70JvsTKOhDAM3OMQFjCpBbtyFYuJDbDbQKD/elGXTHvAmCi5skCNJzE
23
+ oq1l5aVpp3IFrZQfLEimeDDEq8ulEajlllpitq1vD+vycRSydybtAhMs/kK7DY7D
24
+ kVGOI1HDz5p+A6e8wyXoecdaQeswH3gd3nkCKfV7ohKXXywW8JWMEBgYj4ECAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU1qWuKrKD
26
+ A90s8y+KBolbTgtLOP8wJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
27
  c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
- BgkqhkiG9w0BAQsFAAOCAYEAaEl3CKusWW3/wUutwDz0w0ojYuc48LOVrmGpZ8uH
29
- fi2wJx5Jt41/qxGWCz0qo/BFfFZjNllblywzhjIw80HnoGAR5bDaKF0fTdYsh++a
30
- Vyx6BkFVM5hYXIJklBT/PLR7JW/rOrFgPNrXM+BlWlFBGIQT8KaF7SfOYrDdqHG8
31
- uLlGPgLkR0U3tLfyWwF6548QRVz2Lm53dWeT0JGs974Dj+n1b8P3N0Uv1AFpetNB
32
- UTelmW2QjiLgxJxkFkRjp/Ep0s/puw3FbtaviQDBBRr9I/bMAE5QOtKa3k1e7m70
33
- 39utqhm1XpZeJamPbeuSFl6M3RRiwWACVlz9QjWVgA5JSf1GwCsc0nsa76pq7Yfj
34
- DtQaJilztwDXwMGWAJ5qDOiGcuv3Wmec+Nnr/9xoZRA9V/9GqXlWjqSSLgFHk/r1
35
- ODgOmcgAsTin0T9l0vWQJnj2OClh1pcC6tFnQXXcYuwTpbtM2v4OmUfUDvJmwQ42
36
- POydZUYZg49X9TlTDqPMKed0
28
+ BgkqhkiG9w0BAQsFAAOCAYEAHJI3akrM/ne2v1zYcZOy0POu/LLfcBM93BuQcfiN
29
+ ok256P7VWXSjU2TaR8/vLDK0dbZo5V08WCKk08yuqOdzOII7JSVh0QHzzgFTR7yS
30
+ RG3xDZtp6j4MXzoAkLd06rIL1O21GXGc49Wkc7XHeFytbRF8XhT29e4H4s5KMoJ1
31
+ l9Ks2oMc4USzKo7loCU+1q0Sg8BQEpS2+9RNTpG7MKDdneWXhb2o6UTQ3WIDpfEt
32
+ ImShbAmoVHcdMa3HZUH0Pa+pzr/KrM0sg2fa2trlJS9oDq9pZl/cQO6Ryl76lqKg
33
+ bQ7jxCYFron1UH6iS3bJBPfFmIWcio3zT8VmeqVPhXatDDnb00JijkO/WThobMxb
34
+ aZJQ2A0sZWHF9u5v9gda8mq21CID3YMPRzQ1u5o4jjyNhcVyjW29GvfD8+8D4s46
35
+ g10O6gIBUztpsNHwbBTi9fHgtis4E5dAPAo4uzo4MCKVwi72iZQ3E92ACYb9zlxA
36
+ g6CsTY0vUjhgfh9taSRj3qxd
37
37
  -----END CERTIFICATE-----
38
- date: 2019-08-11 00:00:00.000000000 Z
38
+ date: 2022-01-25 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: dynarex
41
+ name: indexer101
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.8'
46
+ version: '0.2'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.8.19
49
+ version: 0.2.5
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '1.8'
56
+ version: '0.2'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.8.19
59
+ version: 0.2.5
60
60
  description:
61
- email: james@jamesrobertson.eu
61
+ email: digital.robertson@gmail.com
62
62
  executables: []
63
63
  extensions: []
64
64
  extra_rdoc_files: []
@@ -83,8 +83,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  requirements: []
86
- rubygems_version: 3.0.1
86
+ rubyforge_project:
87
+ rubygems_version: 2.7.10
87
88
  signing_key:
88
89
  specification_version: 4
89
- summary: Search title entries from plain text (derived from a Dynarex document).
90
+ summary: Originally intended to search title entries from plain text (derived from
91
+ a Dynarex document).
90
92
  test_files: []
metadata.gz.sig CHANGED
Binary file