@cepharum/concrete-db 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -69,12 +69,13 @@ export function compare( left, right ) {
69
69
  * @param {number} maxWordSize maximum number of characters in extracted words
70
70
  * @param {boolean} ignoreCase set true to drop case of extracted words
71
71
  * @param {boolean} strict set false to have any non-string value converted to string instead of being ignored
72
+ * @param {string} wordPattern provides source of global unicode regexp matching a word to generate
72
73
  * @returns {Object<string,number>} map of extracted words into either words's number of occurrences
73
74
  */
74
- export function spread( value, minSize = 10, minWordSize = 3, maxWordSize = Infinity, ignoreCase = true, strict = true ) {
75
+ export function spread( value, minSize = 10, minWordSize = 3, maxWordSize = Infinity, ignoreCase = true, strict = true, wordPattern = undefined ) {
75
76
  const result = {};
76
77
 
77
- for ( const word of generateWords( value, minSize, ignoreCase, strict ) ) {
78
+ for ( const word of generateWords( value, minSize, ignoreCase, strict, wordPattern ) ) {
78
79
  if ( word.length >= minWordSize && word.length <= maxWordSize ) {
79
80
  result[word] = ( result[word] || 0 ) + 1;
80
81
  }
@@ -102,9 +103,10 @@ export function fail( message ) {
102
103
  * @param {number} minSize minimum number of characters in a string to be considered for extracting terms
103
104
  * @param {boolean} ignoreCase set true to drop case of extracted terms
104
105
  * @param {boolean} strict set false to have any non-string value converted to string instead of being ignored
106
+ * @param {string} wordPattern provides source of global unicode regexp matching a word to generate
105
107
  * @returns {Generator<string|*, void, *>} iterator over flat sequence of terms
106
108
  */
107
- function *generateWords( source, minSize, ignoreCase, strict ) {
109
+ function *generateWords( source, minSize, ignoreCase, strict, wordPattern = undefined ) {
108
110
  let stream;
109
111
 
110
112
  if ( Array.isArray( source ) ) {
@@ -124,17 +126,17 @@ function *generateWords( source, minSize, ignoreCase, strict ) {
124
126
  return;
125
127
  }
126
128
 
127
- const ptn = /([\p{L}\p{N}_][\p{L}\p{N}_-]*)/gu;
129
+ const ptn = wordPattern ? new RegExp( wordPattern, "gu" ) : /[\p{L}\p{N}_][\p{L}\p{N}_-]*/gu;
128
130
  let match;
129
131
 
130
132
  while ( ( match = ptn.exec( string ) ) ) {
131
- yield ignoreCase ? match[1].toLocaleLowerCase() : match[1];
133
+ yield ignoreCase ? ( match[1] ?? match[0] ).toLocaleLowerCase() : match[1] ?? match[0];
132
134
  }
133
135
 
134
136
  return;
135
137
  }
136
138
 
137
139
  for ( const sub of stream ) {
138
- yield* generateWords( sub, minSize, ignoreCase, strict );
140
+ yield* generateWords( sub, minSize, ignoreCase, strict, wordPattern );
139
141
  }
140
142
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cepharum/concrete-db",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "description": "a read-only web database generator",
5
5
  "main": "lib/collector.mjs",
6
6
  "types": "concrete-db.d.ts",