@cepharum/concrete-db 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/term-functions.mjs +8 -6
- package/package.json +1 -1
package/lib/term-functions.mjs
CHANGED
|
@@ -69,12 +69,13 @@ export function compare( left, right ) {
|
|
|
69
69
|
* @param {number} maxWordSize maximum number of characters in extracted words
|
|
70
70
|
* @param {boolean} ignoreCase set true to drop case of extracted words
|
|
71
71
|
* @param {boolean} strict set false to have any non-string value converted to string instead of being ignored
|
|
72
|
+
* @param {string} wordPattern provides source of global unicode regexp matching a word to generate
|
|
72
73
|
* @returns {Object<string,number>} map of extracted words into either words's number of occurrences
|
|
73
74
|
*/
|
|
74
|
-
export function spread( value, minSize = 10, minWordSize = 3, maxWordSize = Infinity, ignoreCase = true, strict = true ) {
|
|
75
|
+
export function spread( value, minSize = 10, minWordSize = 3, maxWordSize = Infinity, ignoreCase = true, strict = true, wordPattern = undefined ) {
|
|
75
76
|
const result = {};
|
|
76
77
|
|
|
77
|
-
for ( const word of generateWords( value, minSize, ignoreCase, strict ) ) {
|
|
78
|
+
for ( const word of generateWords( value, minSize, ignoreCase, strict, wordPattern ) ) {
|
|
78
79
|
if ( word.length >= minWordSize && word.length <= maxWordSize ) {
|
|
79
80
|
result[word] = ( result[word] || 0 ) + 1;
|
|
80
81
|
}
|
|
@@ -102,9 +103,10 @@ export function fail( message ) {
|
|
|
102
103
|
* @param {number} minSize minimum number of characters in a string to be considered for extracting terms
|
|
103
104
|
* @param {boolean} ignoreCase set true to drop case of extracted terms
|
|
104
105
|
* @param {boolean} strict set false to have any non-string value converted to string instead of being ignored
|
|
106
|
+
* @param {string} wordPattern provides source of global unicode regexp matching a word to generate
|
|
105
107
|
* @returns {Generator<string|*, void, *>} iterator over flat sequence of terms
|
|
106
108
|
*/
|
|
107
|
-
function *generateWords( source, minSize, ignoreCase, strict ) {
|
|
109
|
+
function *generateWords( source, minSize, ignoreCase, strict, wordPattern = undefined ) {
|
|
108
110
|
let stream;
|
|
109
111
|
|
|
110
112
|
if ( Array.isArray( source ) ) {
|
|
@@ -124,17 +126,17 @@ function *generateWords( source, minSize, ignoreCase, strict ) {
|
|
|
124
126
|
return;
|
|
125
127
|
}
|
|
126
128
|
|
|
127
|
-
const ptn = /
|
|
129
|
+
const ptn = wordPattern ? new RegExp( wordPattern, "gu" ) : /[\p{L}\p{N}_][\p{L}\p{N}_-]*/gu;
|
|
128
130
|
let match;
|
|
129
131
|
|
|
130
132
|
while ( ( match = ptn.exec( string ) ) ) {
|
|
131
|
-
yield ignoreCase ? match[1].toLocaleLowerCase() : match[1];
|
|
133
|
+
yield ignoreCase ? ( match[1] ?? match[0] ).toLocaleLowerCase() : match[1] ?? match[0];
|
|
132
134
|
}
|
|
133
135
|
|
|
134
136
|
return;
|
|
135
137
|
}
|
|
136
138
|
|
|
137
139
|
for ( const sub of stream ) {
|
|
138
|
-
yield* generateWords( sub, minSize, ignoreCase, strict );
|
|
140
|
+
yield* generateWords( sub, minSize, ignoreCase, strict, wordPattern );
|
|
139
141
|
}
|
|
140
142
|
}
|