dxtitle_search 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/dxtitle_search.rb +20 -68
- metadata +10 -9
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74ea6c681b08bebcc9fa822a185562a19a262544abf12e674a150428470c8396
|
4
|
+
data.tar.gz: 1ea0ffc0e631086452ef97e51b5e154f21169d4f4274f5538a4ce7c4e476b67b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e4e998c4df64f0e18ac5556ad197d13ecec2804c9abc7f4ded1a2c7a0a1e89f5c73a73da7ffd8ed418ee5f2eea23233b675eb79412cf68e09c3bb0e333d2def
|
7
|
+
data.tar.gz: 45fc402f35aa22ff4bc66ecf752f8bb946acfeea84106513b382ba921b41c5f68882eb5b415146e13f40be5b28bdb238dc49f68c3ecdd41e7aab5662af00df87
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data.tar.gz.sig
CHANGED
Binary file
|
data/lib/dxtitle_search.rb
CHANGED
@@ -2,102 +2,54 @@
|
|
2
2
|
|
3
3
|
# file: dxtitle_search.rb
|
4
4
|
|
5
|
-
require '
|
5
|
+
require 'indexer101'
|
6
6
|
|
7
7
|
|
8
8
|
class DxTitleSearch
|
9
9
|
|
10
|
-
def initialize(obj=nil, sources:
|
10
|
+
def initialize(obj=nil, sources: nil, debug: false)
|
11
11
|
|
12
12
|
@debug = debug
|
13
|
+
@indexer = Indexer101.new debug: debug
|
13
14
|
|
14
15
|
s = if sources then
|
15
16
|
|
16
|
-
dx = Dynarex.new(sources)
|
17
|
-
dx.all.map
|
17
|
+
dx = Dynarex.new(sources)
|
18
|
+
@indexer.scan_dxindex dx.all.map(&:uri), level: 1
|
18
19
|
|
19
|
-
elsif obj
|
20
|
-
|
21
|
-
# is it a Dynarex file location?
|
22
|
-
if obj.lines.length < 2 then
|
20
|
+
elsif obj and obj.lines.length < 2
|
23
21
|
|
24
|
-
|
25
|
-
|
26
|
-
else
|
27
|
-
|
28
|
-
obj
|
22
|
+
@indexer.scan_dxindex obj, level: 1
|
29
23
|
|
30
|
-
end
|
31
24
|
end
|
32
25
|
|
33
|
-
@
|
34
|
-
key, value = x.split(/\s+(?=[^\s]+$)/,2)
|
35
|
-
r.merge(key.rstrip => value)
|
36
|
-
end
|
37
|
-
|
38
|
-
@a = h.keys
|
26
|
+
@indexer.build
|
39
27
|
|
40
28
|
end
|
41
29
|
|
42
|
-
def search(keywords)
|
43
|
-
|
44
|
-
phrases = @a.grep /#{keywords}/i
|
45
|
-
|
46
|
-
# find out the keywords count for each entry found
|
47
|
-
a0 = keywords.split.flat_map do |x|
|
48
|
-
next if @a.length < 2
|
49
|
-
@a.grep /#{x}/i
|
50
|
-
end
|
51
|
-
|
52
|
-
a = a0.uniq.map do |entry|
|
53
|
-
[entry, entry.scan(/#{keywords.split.join('|')}/).uniq.count]
|
54
|
-
end
|
55
|
-
|
56
|
-
# sort by keywords found per entry and then date
|
57
|
-
#a2 = (phrases + a).uniq.sort do |x, x2|
|
58
|
-
a2 = a.sort do |x, x2|
|
59
|
-
-([x.last, x.first[/^\d+/], ] <=> [x2.last, x2.first[/^\d+/]])
|
60
|
-
end
|
30
|
+
def search(keywords)
|
61
31
|
|
32
|
+
a2 = @indexer.search keywords.split(/[\s:"!\?\(\)£]+(?=[\w#_'-]+)/)
|
62
33
|
# format each result as a Hash object
|
63
|
-
a3 =
|
34
|
+
a3 = a2.map do |date, title, url|
|
64
35
|
|
65
|
-
|
66
|
-
line, _ = x
|
67
|
-
else
|
68
|
-
line = x
|
69
|
-
end
|
70
|
-
|
71
|
-
puts 'line: ' + line.inspect if @debug
|
72
|
-
|
73
|
-
rawtime, title = line.split(/ +/,2)
|
74
|
-
puts 'title: ' + title.inspect if @debug
|
75
|
-
|
76
|
-
{title: title, url: @h[line].chomp, date: Time.at(rawtime.to_i)}
|
36
|
+
{title: title, url: url, date: date}
|
77
37
|
|
78
38
|
end
|
79
39
|
|
80
40
|
puts 'a3: ' + a3.inspect if @debug
|
41
|
+
|
42
|
+
def a3.to_dx()
|
43
|
+
Dynarex.new('results/result(title, url, date)').import(self)
|
44
|
+
end
|
81
45
|
|
82
46
|
return a3
|
83
47
|
|
84
48
|
end
|
85
49
|
|
86
|
-
def tag_search(keywords)
|
87
|
-
|
88
|
-
|
89
|
-
end
|
90
|
-
|
91
|
-
private
|
92
|
-
|
93
|
-
def read(source)
|
94
|
-
|
95
|
-
dx = Dynarex.new(source)
|
96
|
-
|
97
|
-
dx.all.map do |x|
|
98
|
-
"%d %s %s" % [Time.parse(x.created).to_i, x.title, x.url]
|
99
|
-
end.join("\n")
|
100
|
-
|
101
|
-
end
|
50
|
+
def tag_search(keywords)
|
51
|
+
r = @indexer.lookup *keywords.split(/[\W]+(?=[\w]+)/).map {|x| "#" + x}
|
52
|
+
r.map {|x| x.to_s[1..-1]}
|
53
|
+
end
|
102
54
|
|
103
55
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dxtitle_search
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -35,28 +35,28 @@ cert_chain:
|
|
35
35
|
ODgOmcgAsTin0T9l0vWQJnj2OClh1pcC6tFnQXXcYuwTpbtM2v4OmUfUDvJmwQ42
|
36
36
|
POydZUYZg49X9TlTDqPMKed0
|
37
37
|
-----END CERTIFICATE-----
|
38
|
-
date: 2019-
|
38
|
+
date: 2019-11-12 00:00:00.000000000 Z
|
39
39
|
dependencies:
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
|
-
name:
|
41
|
+
name: indexer101
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|
43
43
|
requirements:
|
44
44
|
- - "~>"
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version: '
|
46
|
+
version: '0.2'
|
47
47
|
- - ">="
|
48
48
|
- !ruby/object:Gem::Version
|
49
|
-
version:
|
49
|
+
version: 0.2.1
|
50
50
|
type: :runtime
|
51
51
|
prerelease: false
|
52
52
|
version_requirements: !ruby/object:Gem::Requirement
|
53
53
|
requirements:
|
54
54
|
- - "~>"
|
55
55
|
- !ruby/object:Gem::Version
|
56
|
-
version: '
|
56
|
+
version: '0.2'
|
57
57
|
- - ">="
|
58
58
|
- !ruby/object:Gem::Version
|
59
|
-
version:
|
59
|
+
version: 0.2.1
|
60
60
|
description:
|
61
61
|
email: james@jamesrobertson.eu
|
62
62
|
executables: []
|
@@ -83,8 +83,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
83
83
|
- !ruby/object:Gem::Version
|
84
84
|
version: '0'
|
85
85
|
requirements: []
|
86
|
-
rubygems_version: 3.0.
|
86
|
+
rubygems_version: 3.0.3
|
87
87
|
signing_key:
|
88
88
|
specification_version: 4
|
89
|
-
summary:
|
89
|
+
summary: Originally intended to search title entries from plain text (derived from
|
90
|
+
a Dynarex document).
|
90
91
|
test_files: []
|
metadata.gz.sig
CHANGED
Binary file
|