amalgalite 1.4.1-x86-mingw32 → 1.5.0-x86-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +5 -0
- data/Manifest.txt +1 -1
- data/examples/fts5.rb +152 -0
- data/ext/amalgalite/c/extconf.rb +16 -2
- data/ext/amalgalite/c/sqlite3.c +25266 -2638
- data/ext/amalgalite/c/sqlite3.h +594 -11
- data/ext/amalgalite/c/sqlite3ext.h +10 -3
- data/lib/amalgalite/1.8/amalgalite.so +0 -0
- data/lib/amalgalite/1.9/amalgalite.so +0 -0
- data/lib/amalgalite/2.0/amalgalite.so +0 -0
- data/lib/amalgalite/2.1/amalgalite.so +0 -0
- data/lib/amalgalite/2.2/amalgalite.so +0 -0
- data/lib/amalgalite/version.rb +1 -1
- data/spec/sqlite3/version_spec.rb +6 -6
- metadata +3 -3
- data/examples/fts3.rb +0 -144
data/ext/amalgalite/c/sqlite3.h
CHANGED
@@ -111,9 +111,9 @@ extern "C" {
|
|
111
111
|
** [sqlite3_libversion_number()], [sqlite3_sourceid()],
|
112
112
|
** [sqlite_version()] and [sqlite_source_id()].
|
113
113
|
*/
|
114
|
-
#define SQLITE_VERSION "3.
|
115
|
-
#define SQLITE_VERSION_NUMBER
|
116
|
-
#define SQLITE_SOURCE_ID "2015-
|
114
|
+
#define SQLITE_VERSION "3.9.2"
|
115
|
+
#define SQLITE_VERSION_NUMBER 3009002
|
116
|
+
#define SQLITE_SOURCE_ID "2015-11-02 18:31:45 bda77dda9697c463c3d0704014d51627fceee328"
|
117
117
|
|
118
118
|
/*
|
119
119
|
** CAPI3REF: Run-Time Library Version Numbers
|
@@ -124,7 +124,7 @@ extern "C" {
|
|
124
124
|
** but are associated with the library instead of the header file. ^(Cautious
|
125
125
|
** programmers might include assert() statements in their application to
|
126
126
|
** verify that values returned by these interfaces match the macros in
|
127
|
-
** the header, and thus
|
127
|
+
** the header, and thus ensure that the application is
|
128
128
|
** compiled with matching library and header files.
|
129
129
|
**
|
130
130
|
** <blockquote><pre>
|
@@ -374,7 +374,7 @@ typedef int (*sqlite3_callback)(void*,int,char**, char**);
|
|
374
374
|
** Restrictions:
|
375
375
|
**
|
376
376
|
** <ul>
|
377
|
-
** <li> The application must
|
377
|
+
** <li> The application must ensure that the 1st parameter to sqlite3_exec()
|
378
378
|
** is a valid and open [database connection].
|
379
379
|
** <li> The application must not close the [database connection] specified by
|
380
380
|
** the 1st parameter to sqlite3_exec() while sqlite3_exec() is running.
|
@@ -477,6 +477,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_exec(
|
|
477
477
|
#define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8))
|
478
478
|
#define SQLITE_IOERR_GETTEMPPATH (SQLITE_IOERR | (25<<8))
|
479
479
|
#define SQLITE_IOERR_CONVPATH (SQLITE_IOERR | (26<<8))
|
480
|
+
#define SQLITE_IOERR_VNODE (SQLITE_IOERR | (27<<8))
|
480
481
|
#define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8))
|
481
482
|
#define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8))
|
482
483
|
#define SQLITE_BUSY_SNAPSHOT (SQLITE_BUSY | (2<<8))
|
@@ -1366,9 +1367,11 @@ SQLITE_API int SQLITE_STDCALL sqlite3_os_end(void);
|
|
1366
1367
|
** applications and so this routine is usually not necessary. It is
|
1367
1368
|
** provided to support rare applications with unusual needs.
|
1368
1369
|
**
|
1369
|
-
** The sqlite3_config() interface is not threadsafe.
|
1370
|
-
** must
|
1371
|
-
** threads while sqlite3_config() is running
|
1370
|
+
** <b>The sqlite3_config() interface is not threadsafe. The application
|
1371
|
+
** must ensure that no other SQLite interfaces are invoked by other
|
1372
|
+
** threads while sqlite3_config() is running.</b>
|
1373
|
+
**
|
1374
|
+
** The sqlite3_config() interface
|
1372
1375
|
** may only be invoked prior to library initialization using
|
1373
1376
|
** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()].
|
1374
1377
|
** ^If sqlite3_config() is called after [sqlite3_initialize()] and before
|
@@ -3373,7 +3376,8 @@ SQLITE_API int SQLITE_STDCALL sqlite3_stmt_readonly(sqlite3_stmt *pStmt);
|
|
3373
3376
|
**
|
3374
3377
|
** ^The sqlite3_stmt_busy(S) interface returns true (non-zero) if the
|
3375
3378
|
** [prepared statement] S has been stepped at least once using
|
3376
|
-
** [sqlite3_step(S)] but has
|
3379
|
+
** [sqlite3_step(S)] but has neither run to completion (returned
|
3380
|
+
** [SQLITE_DONE] from [sqlite3_step(S)]) nor
|
3377
3381
|
** been reset using [sqlite3_reset(S)]. ^The sqlite3_stmt_busy(S)
|
3378
3382
|
** interface returns false if S is a NULL pointer. If S is not a
|
3379
3383
|
** NULL pointer and is not a pointer to a valid [prepared statement]
|
@@ -3626,7 +3630,7 @@ SQLITE_API const char *SQLITE_STDCALL sqlite3_bind_parameter_name(sqlite3_stmt*,
|
|
3626
3630
|
**
|
3627
3631
|
** See also: [sqlite3_bind_blob|sqlite3_bind()],
|
3628
3632
|
** [sqlite3_bind_parameter_count()], and
|
3629
|
-
** [
|
3633
|
+
** [sqlite3_bind_parameter_name()].
|
3630
3634
|
*/
|
3631
3635
|
SQLITE_API int SQLITE_STDCALL sqlite3_bind_parameter_index(sqlite3_stmt*, const char *zName);
|
3632
3636
|
|
@@ -4355,6 +4359,22 @@ SQLITE_API const void *SQLITE_STDCALL sqlite3_value_text16be(sqlite3_value*);
|
|
4355
4359
|
SQLITE_API int SQLITE_STDCALL sqlite3_value_type(sqlite3_value*);
|
4356
4360
|
SQLITE_API int SQLITE_STDCALL sqlite3_value_numeric_type(sqlite3_value*);
|
4357
4361
|
|
4362
|
+
/*
|
4363
|
+
** CAPI3REF: Finding The Subtype Of SQL Values
|
4364
|
+
** METHOD: sqlite3_value
|
4365
|
+
**
|
4366
|
+
** The sqlite3_value_subtype(V) function returns the subtype for
|
4367
|
+
** an [application-defined SQL function] argument V. The subtype
|
4368
|
+
** information can be used to pass a limited amount of context from
|
4369
|
+
** one SQL function to another. Use the [sqlite3_result_subtype()]
|
4370
|
+
** routine to set the subtype for the return value of an SQL function.
|
4371
|
+
**
|
4372
|
+
** SQLite makes no use of subtype itself. It merely passes the subtype
|
4373
|
+
** from the result of one [application-defined SQL function] into the
|
4374
|
+
** input of another.
|
4375
|
+
*/
|
4376
|
+
SQLITE_API unsigned int SQLITE_STDCALL sqlite3_value_subtype(sqlite3_value*);
|
4377
|
+
|
4358
4378
|
/*
|
4359
4379
|
** CAPI3REF: Copy And Free SQL Values
|
4360
4380
|
** METHOD: sqlite3_value
|
@@ -4654,6 +4674,21 @@ SQLITE_API void SQLITE_STDCALL sqlite3_result_value(sqlite3_context*, sqlite3_va
|
|
4654
4674
|
SQLITE_API void SQLITE_STDCALL sqlite3_result_zeroblob(sqlite3_context*, int n);
|
4655
4675
|
SQLITE_API int SQLITE_STDCALL sqlite3_result_zeroblob64(sqlite3_context*, sqlite3_uint64 n);
|
4656
4676
|
|
4677
|
+
|
4678
|
+
/*
|
4679
|
+
** CAPI3REF: Setting The Subtype Of An SQL Function
|
4680
|
+
** METHOD: sqlite3_context
|
4681
|
+
**
|
4682
|
+
** The sqlite3_result_subtype(C,T) function causes the subtype of
|
4683
|
+
** the result from the [application-defined SQL function] with
|
4684
|
+
** [sqlite3_context] C to be the value T. Only the lower 8 bits
|
4685
|
+
** of the subtype T are preserved in current versions of SQLite;
|
4686
|
+
** higher order bits are discarded.
|
4687
|
+
** The number of subtype bytes preserved by SQLite might increase
|
4688
|
+
** in future releases of SQLite.
|
4689
|
+
*/
|
4690
|
+
SQLITE_API void SQLITE_STDCALL sqlite3_result_subtype(sqlite3_context*,unsigned int);
|
4691
|
+
|
4657
4692
|
/*
|
4658
4693
|
** CAPI3REF: Define New Collating Sequences
|
4659
4694
|
** METHOD: sqlite3
|
@@ -5599,13 +5634,31 @@ struct sqlite3_module {
|
|
5599
5634
|
** ^The estimatedRows value is an estimate of the number of rows that
|
5600
5635
|
** will be returned by the strategy.
|
5601
5636
|
**
|
5637
|
+
** The xBestIndex method may optionally populate the idxFlags field with a
|
5638
|
+
** mask of SQLITE_INDEX_SCAN_* flags. Currently there is only one such flag -
|
5639
|
+
** SQLITE_INDEX_SCAN_UNIQUE. If the xBestIndex method sets this flag, SQLite
|
5640
|
+
** assumes that the strategy may visit at most one row.
|
5641
|
+
**
|
5642
|
+
** Additionally, if xBestIndex sets the SQLITE_INDEX_SCAN_UNIQUE flag, then
|
5643
|
+
** SQLite also assumes that if a call to the xUpdate() method is made as
|
5644
|
+
** part of the same statement to delete or update a virtual table row and the
|
5645
|
+
** implementation returns SQLITE_CONSTRAINT, then there is no need to rollback
|
5646
|
+
** any database changes. In other words, if the xUpdate() returns
|
5647
|
+
** SQLITE_CONSTRAINT, the database contents must be exactly as they were
|
5648
|
+
** before xUpdate was called. By contrast, if SQLITE_INDEX_SCAN_UNIQUE is not
|
5649
|
+
** set and xUpdate returns SQLITE_CONSTRAINT, any database changes made by
|
5650
|
+
** the xUpdate method are automatically rolled back by SQLite.
|
5651
|
+
**
|
5602
5652
|
** IMPORTANT: The estimatedRows field was added to the sqlite3_index_info
|
5603
5653
|
** structure for SQLite version 3.8.2. If a virtual table extension is
|
5604
5654
|
** used with an SQLite version earlier than 3.8.2, the results of attempting
|
5605
5655
|
** to read or write the estimatedRows field are undefined (but are likely
|
5606
5656
|
** to included crashing the application). The estimatedRows field should
|
5607
5657
|
** therefore only be used if [sqlite3_libversion_number()] returns a
|
5608
|
-
** value greater than or equal to 3008002.
|
5658
|
+
** value greater than or equal to 3008002. Similarly, the idxFlags field
|
5659
|
+
** was added for version 3.9.0. It may therefore only be used if
|
5660
|
+
** sqlite3_libversion_number() returns a value greater than or equal to
|
5661
|
+
** 3009000.
|
5609
5662
|
*/
|
5610
5663
|
struct sqlite3_index_info {
|
5611
5664
|
/* Inputs */
|
@@ -5633,8 +5686,15 @@ struct sqlite3_index_info {
|
|
5633
5686
|
double estimatedCost; /* Estimated cost of using this index */
|
5634
5687
|
/* Fields below are only available in SQLite 3.8.2 and later */
|
5635
5688
|
sqlite3_int64 estimatedRows; /* Estimated number of rows returned */
|
5689
|
+
/* Fields below are only available in SQLite 3.9.0 and later */
|
5690
|
+
int idxFlags; /* Mask of SQLITE_INDEX_SCAN_* flags */
|
5636
5691
|
};
|
5637
5692
|
|
5693
|
+
/*
|
5694
|
+
** CAPI3REF: Virtual Table Scan Flags
|
5695
|
+
*/
|
5696
|
+
#define SQLITE_INDEX_SCAN_UNIQUE 1 /* Scan visits at most 1 row */
|
5697
|
+
|
5638
5698
|
/*
|
5639
5699
|
** CAPI3REF: Virtual Table Constraint Operator Codes
|
5640
5700
|
**
|
@@ -6092,6 +6152,9 @@ SQLITE_API int SQLITE_STDCALL sqlite3_vfs_unregister(sqlite3_vfs*);
|
|
6092
6152
|
** <li> SQLITE_MUTEX_STATIC_APP1
|
6093
6153
|
** <li> SQLITE_MUTEX_STATIC_APP2
|
6094
6154
|
** <li> SQLITE_MUTEX_STATIC_APP3
|
6155
|
+
** <li> SQLITE_MUTEX_STATIC_VFS1
|
6156
|
+
** <li> SQLITE_MUTEX_STATIC_VFS2
|
6157
|
+
** <li> SQLITE_MUTEX_STATIC_VFS3
|
6095
6158
|
** </ul>
|
6096
6159
|
**
|
6097
6160
|
** ^The first two constants (SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE)
|
@@ -7858,3 +7921,523 @@ struct sqlite3_rtree_query_info {
|
|
7858
7921
|
|
7859
7922
|
#endif /* ifndef _SQLITE3RTREE_H_ */
|
7860
7923
|
|
7924
|
+
/*
|
7925
|
+
** 2014 May 31
|
7926
|
+
**
|
7927
|
+
** The author disclaims copyright to this source code. In place of
|
7928
|
+
** a legal notice, here is a blessing:
|
7929
|
+
**
|
7930
|
+
** May you do good and not evil.
|
7931
|
+
** May you find forgiveness for yourself and forgive others.
|
7932
|
+
** May you share freely, never taking more than you give.
|
7933
|
+
**
|
7934
|
+
******************************************************************************
|
7935
|
+
**
|
7936
|
+
** Interfaces to extend FTS5. Using the interfaces defined in this file,
|
7937
|
+
** FTS5 may be extended with:
|
7938
|
+
**
|
7939
|
+
** * custom tokenizers, and
|
7940
|
+
** * custom auxiliary functions.
|
7941
|
+
*/
|
7942
|
+
|
7943
|
+
|
7944
|
+
#ifndef _FTS5_H
|
7945
|
+
#define _FTS5_H
|
7946
|
+
|
7947
|
+
|
7948
|
+
#ifdef __cplusplus
|
7949
|
+
extern "C" {
|
7950
|
+
#endif
|
7951
|
+
|
7952
|
+
/*************************************************************************
|
7953
|
+
** CUSTOM AUXILIARY FUNCTIONS
|
7954
|
+
**
|
7955
|
+
** Virtual table implementations may overload SQL functions by implementing
|
7956
|
+
** the sqlite3_module.xFindFunction() method.
|
7957
|
+
*/
|
7958
|
+
|
7959
|
+
typedef struct Fts5ExtensionApi Fts5ExtensionApi;
|
7960
|
+
typedef struct Fts5Context Fts5Context;
|
7961
|
+
typedef struct Fts5PhraseIter Fts5PhraseIter;
|
7962
|
+
|
7963
|
+
typedef void (*fts5_extension_function)(
|
7964
|
+
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
|
7965
|
+
Fts5Context *pFts, /* First arg to pass to pApi functions */
|
7966
|
+
sqlite3_context *pCtx, /* Context for returning result/error */
|
7967
|
+
int nVal, /* Number of values in apVal[] array */
|
7968
|
+
sqlite3_value **apVal /* Array of trailing arguments */
|
7969
|
+
);
|
7970
|
+
|
7971
|
+
struct Fts5PhraseIter {
|
7972
|
+
const unsigned char *a;
|
7973
|
+
const unsigned char *b;
|
7974
|
+
};
|
7975
|
+
|
7976
|
+
/*
|
7977
|
+
** EXTENSION API FUNCTIONS
|
7978
|
+
**
|
7979
|
+
** xUserData(pFts):
|
7980
|
+
** Return a copy of the context pointer the extension function was
|
7981
|
+
** registered with.
|
7982
|
+
**
|
7983
|
+
** xColumnTotalSize(pFts, iCol, pnToken):
|
7984
|
+
** If parameter iCol is less than zero, set output variable *pnToken
|
7985
|
+
** to the total number of tokens in the FTS5 table. Or, if iCol is
|
7986
|
+
** non-negative but less than the number of columns in the table, return
|
7987
|
+
** the total number of tokens in column iCol, considering all rows in
|
7988
|
+
** the FTS5 table.
|
7989
|
+
**
|
7990
|
+
** If parameter iCol is greater than or equal to the number of columns
|
7991
|
+
** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
|
7992
|
+
** an OOM condition or IO error), an appropriate SQLite error code is
|
7993
|
+
** returned.
|
7994
|
+
**
|
7995
|
+
** xColumnCount(pFts):
|
7996
|
+
** Return the number of columns in the table.
|
7997
|
+
**
|
7998
|
+
** xColumnSize(pFts, iCol, pnToken):
|
7999
|
+
** If parameter iCol is less than zero, set output variable *pnToken
|
8000
|
+
** to the total number of tokens in the current row. Or, if iCol is
|
8001
|
+
** non-negative but less than the number of columns in the table, set
|
8002
|
+
** *pnToken to the number of tokens in column iCol of the current row.
|
8003
|
+
**
|
8004
|
+
** If parameter iCol is greater than or equal to the number of columns
|
8005
|
+
** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
|
8006
|
+
** an OOM condition or IO error), an appropriate SQLite error code is
|
8007
|
+
** returned.
|
8008
|
+
**
|
8009
|
+
** xColumnText:
|
8010
|
+
** This function attempts to retrieve the text of column iCol of the
|
8011
|
+
** current document. If successful, (*pz) is set to point to a buffer
|
8012
|
+
** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
|
8013
|
+
** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
|
8014
|
+
** if an error occurs, an SQLite error code is returned and the final values
|
8015
|
+
** of (*pz) and (*pn) are undefined.
|
8016
|
+
**
|
8017
|
+
** xPhraseCount:
|
8018
|
+
** Returns the number of phrases in the current query expression.
|
8019
|
+
**
|
8020
|
+
** xPhraseSize:
|
8021
|
+
** Returns the number of tokens in phrase iPhrase of the query. Phrases
|
8022
|
+
** are numbered starting from zero.
|
8023
|
+
**
|
8024
|
+
** xInstCount:
|
8025
|
+
** Set *pnInst to the total number of occurrences of all phrases within
|
8026
|
+
** the query within the current row. Return SQLITE_OK if successful, or
|
8027
|
+
** an error code (i.e. SQLITE_NOMEM) if an error occurs.
|
8028
|
+
**
|
8029
|
+
** xInst:
|
8030
|
+
** Query for the details of phrase match iIdx within the current row.
|
8031
|
+
** Phrase matches are numbered starting from zero, so the iIdx argument
|
8032
|
+
** should be greater than or equal to zero and smaller than the value
|
8033
|
+
** output by xInstCount().
|
8034
|
+
**
|
8035
|
+
** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)
|
8036
|
+
** if an error occurs.
|
8037
|
+
**
|
8038
|
+
** xRowid:
|
8039
|
+
** Returns the rowid of the current row.
|
8040
|
+
**
|
8041
|
+
** xTokenize:
|
8042
|
+
** Tokenize text using the tokenizer belonging to the FTS5 table.
|
8043
|
+
**
|
8044
|
+
** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
|
8045
|
+
** This API function is used to query the FTS table for phrase iPhrase
|
8046
|
+
** of the current query. Specifically, a query equivalent to:
|
8047
|
+
**
|
8048
|
+
** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
|
8049
|
+
**
|
8050
|
+
** with $p set to a phrase equivalent to the phrase iPhrase of the
|
8051
|
+
** current query is executed. For each row visited, the callback function
|
8052
|
+
** passed as the fourth argument is invoked. The context and API objects
|
8053
|
+
** passed to the callback function may be used to access the properties of
|
8054
|
+
** each matched row. Invoking Api.xUserData() returns a copy of the pointer
|
8055
|
+
** passed as the third argument to pUserData.
|
8056
|
+
**
|
8057
|
+
** If the callback function returns any value other than SQLITE_OK, the
|
8058
|
+
** query is abandoned and the xQueryPhrase function returns immediately.
|
8059
|
+
** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
|
8060
|
+
** Otherwise, the error code is propagated upwards.
|
8061
|
+
**
|
8062
|
+
** If the query runs to completion without incident, SQLITE_OK is returned.
|
8063
|
+
** Or, if some error occurs before the query completes or is aborted by
|
8064
|
+
** the callback, an SQLite error code is returned.
|
8065
|
+
**
|
8066
|
+
**
|
8067
|
+
** xSetAuxdata(pFts5, pAux, xDelete)
|
8068
|
+
**
|
8069
|
+
** Save the pointer passed as the second argument as the extension functions
|
8070
|
+
** "auxiliary data". The pointer may then be retrieved by the current or any
|
8071
|
+
** future invocation of the same fts5 extension function made as part of
|
8072
|
+
** of the same MATCH query using the xGetAuxdata() API.
|
8073
|
+
**
|
8074
|
+
** Each extension function is allocated a single auxiliary data slot for
|
8075
|
+
** each FTS query (MATCH expression). If the extension function is invoked
|
8076
|
+
** more than once for a single FTS query, then all invocations share a
|
8077
|
+
** single auxiliary data context.
|
8078
|
+
**
|
8079
|
+
** If there is already an auxiliary data pointer when this function is
|
8080
|
+
** invoked, then it is replaced by the new pointer. If an xDelete callback
|
8081
|
+
** was specified along with the original pointer, it is invoked at this
|
8082
|
+
** point.
|
8083
|
+
**
|
8084
|
+
** The xDelete callback, if one is specified, is also invoked on the
|
8085
|
+
** auxiliary data pointer after the FTS5 query has finished.
|
8086
|
+
**
|
8087
|
+
** If an error (e.g. an OOM condition) occurs within this function, an
|
8088
|
+
** the auxiliary data is set to NULL and an error code returned. If the
|
8089
|
+
** xDelete parameter was not NULL, it is invoked on the auxiliary data
|
8090
|
+
** pointer before returning.
|
8091
|
+
**
|
8092
|
+
**
|
8093
|
+
** xGetAuxdata(pFts5, bClear)
|
8094
|
+
**
|
8095
|
+
** Returns the current auxiliary data pointer for the fts5 extension
|
8096
|
+
** function. See the xSetAuxdata() method for details.
|
8097
|
+
**
|
8098
|
+
** If the bClear argument is non-zero, then the auxiliary data is cleared
|
8099
|
+
** (set to NULL) before this function returns. In this case the xDelete,
|
8100
|
+
** if any, is not invoked.
|
8101
|
+
**
|
8102
|
+
**
|
8103
|
+
** xRowCount(pFts5, pnRow)
|
8104
|
+
**
|
8105
|
+
** This function is used to retrieve the total number of rows in the table.
|
8106
|
+
** In other words, the same value that would be returned by:
|
8107
|
+
**
|
8108
|
+
** SELECT count(*) FROM ftstable;
|
8109
|
+
**
|
8110
|
+
** xPhraseFirst()
|
8111
|
+
** This function is used, along with type Fts5PhraseIter and the xPhraseNext
|
8112
|
+
** method, to iterate through all instances of a single query phrase within
|
8113
|
+
** the current row. This is the same information as is accessible via the
|
8114
|
+
** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
|
8115
|
+
** to use, this API may be faster under some circumstances. To iterate
|
8116
|
+
** through instances of phrase iPhrase, use the following code:
|
8117
|
+
**
|
8118
|
+
** Fts5PhraseIter iter;
|
8119
|
+
** int iCol, iOff;
|
8120
|
+
** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
|
8121
|
+
** iOff>=0;
|
8122
|
+
** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
|
8123
|
+
** ){
|
8124
|
+
** // An instance of phrase iPhrase at offset iOff of column iCol
|
8125
|
+
** }
|
8126
|
+
**
|
8127
|
+
** The Fts5PhraseIter structure is defined above. Applications should not
|
8128
|
+
** modify this structure directly - it should only be used as shown above
|
8129
|
+
** with the xPhraseFirst() and xPhraseNext() API methods.
|
8130
|
+
**
|
8131
|
+
** xPhraseNext()
|
8132
|
+
** See xPhraseFirst above.
|
8133
|
+
*/
|
8134
|
+
struct Fts5ExtensionApi {
|
8135
|
+
int iVersion; /* Currently always set to 1 */
|
8136
|
+
|
8137
|
+
void *(*xUserData)(Fts5Context*);
|
8138
|
+
|
8139
|
+
int (*xColumnCount)(Fts5Context*);
|
8140
|
+
int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
|
8141
|
+
int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
|
8142
|
+
|
8143
|
+
int (*xTokenize)(Fts5Context*,
|
8144
|
+
const char *pText, int nText, /* Text to tokenize */
|
8145
|
+
void *pCtx, /* Context passed to xToken() */
|
8146
|
+
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
|
8147
|
+
);
|
8148
|
+
|
8149
|
+
int (*xPhraseCount)(Fts5Context*);
|
8150
|
+
int (*xPhraseSize)(Fts5Context*, int iPhrase);
|
8151
|
+
|
8152
|
+
int (*xInstCount)(Fts5Context*, int *pnInst);
|
8153
|
+
int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
|
8154
|
+
|
8155
|
+
sqlite3_int64 (*xRowid)(Fts5Context*);
|
8156
|
+
int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
|
8157
|
+
int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
|
8158
|
+
|
8159
|
+
int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
|
8160
|
+
int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
|
8161
|
+
);
|
8162
|
+
int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
|
8163
|
+
void *(*xGetAuxdata)(Fts5Context*, int bClear);
|
8164
|
+
|
8165
|
+
void (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
|
8166
|
+
void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
|
8167
|
+
};
|
8168
|
+
|
8169
|
+
/*
|
8170
|
+
** CUSTOM AUXILIARY FUNCTIONS
|
8171
|
+
*************************************************************************/
|
8172
|
+
|
8173
|
+
/*************************************************************************
|
8174
|
+
** CUSTOM TOKENIZERS
|
8175
|
+
**
|
8176
|
+
** Applications may also register custom tokenizer types. A tokenizer
|
8177
|
+
** is registered by providing fts5 with a populated instance of the
|
8178
|
+
** following structure. All structure methods must be defined, setting
|
8179
|
+
** any member of the fts5_tokenizer struct to NULL leads to undefined
|
8180
|
+
** behaviour. The structure methods are expected to function as follows:
|
8181
|
+
**
|
8182
|
+
** xCreate:
|
8183
|
+
** This function is used to allocate and inititalize a tokenizer instance.
|
8184
|
+
** A tokenizer instance is required to actually tokenize text.
|
8185
|
+
**
|
8186
|
+
** The first argument passed to this function is a copy of the (void*)
|
8187
|
+
** pointer provided by the application when the fts5_tokenizer object
|
8188
|
+
** was registered with FTS5 (the third argument to xCreateTokenizer()).
|
8189
|
+
** The second and third arguments are an array of nul-terminated strings
|
8190
|
+
** containing the tokenizer arguments, if any, specified following the
|
8191
|
+
** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
|
8192
|
+
** to create the FTS5 table.
|
8193
|
+
**
|
8194
|
+
** The final argument is an output variable. If successful, (*ppOut)
|
8195
|
+
** should be set to point to the new tokenizer handle and SQLITE_OK
|
8196
|
+
** returned. If an error occurs, some value other than SQLITE_OK should
|
8197
|
+
** be returned. In this case, fts5 assumes that the final value of *ppOut
|
8198
|
+
** is undefined.
|
8199
|
+
**
|
8200
|
+
** xDelete:
|
8201
|
+
** This function is invoked to delete a tokenizer handle previously
|
8202
|
+
** allocated using xCreate(). Fts5 guarantees that this function will
|
8203
|
+
** be invoked exactly once for each successful call to xCreate().
|
8204
|
+
**
|
8205
|
+
** xTokenize:
|
8206
|
+
** This function is expected to tokenize the nText byte string indicated
|
8207
|
+
** by argument pText. pText may or may not be nul-terminated. The first
|
8208
|
+
** argument passed to this function is a pointer to an Fts5Tokenizer object
|
8209
|
+
** returned by an earlier call to xCreate().
|
8210
|
+
**
|
8211
|
+
** The second argument indicates the reason that FTS5 is requesting
|
8212
|
+
** tokenization of the supplied text. This is always one of the following
|
8213
|
+
** four values:
|
8214
|
+
**
|
8215
|
+
** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
|
8216
|
+
** or removed from the FTS table. The tokenizer is being invoked to
|
8217
|
+
** determine the set of tokens to add to (or delete from) the
|
8218
|
+
** FTS index.
|
8219
|
+
**
|
8220
|
+
** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
|
8221
|
+
** against the FTS index. The tokenizer is being called to tokenize
|
8222
|
+
** a bareword or quoted string specified as part of the query.
|
8223
|
+
**
|
8224
|
+
** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
|
8225
|
+
** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
|
8226
|
+
** followed by a "*" character, indicating that the last token
|
8227
|
+
** returned by the tokenizer will be treated as a token prefix.
|
8228
|
+
**
|
8229
|
+
** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
|
8230
|
+
** satisfy an fts5_api.xTokenize() request made by an auxiliary
|
8231
|
+
** function. Or an fts5_api.xColumnSize() request made by the same
|
8232
|
+
** on a columnsize=0 database.
|
8233
|
+
** </ul>
|
8234
|
+
**
|
8235
|
+
** For each token in the input string, the supplied callback xToken() must
|
8236
|
+
** be invoked. The first argument to it should be a copy of the pointer
|
8237
|
+
** passed as the second argument to xTokenize(). The third and fourth
|
8238
|
+
** arguments are a pointer to a buffer containing the token text, and the
|
8239
|
+
** size of the token in bytes. The 4th and 5th arguments are the byte offsets
|
8240
|
+
** of the first byte of and first byte immediately following the text from
|
8241
|
+
** which the token is derived within the input.
|
8242
|
+
**
|
8243
|
+
** The second argument passed to the xToken() callback ("tflags") should
|
8244
|
+
** normally be set to 0. The exception is if the tokenizer supports
|
8245
|
+
** synonyms. In this case see the discussion below for details.
|
8246
|
+
**
|
8247
|
+
** FTS5 assumes the xToken() callback is invoked for each token in the
|
8248
|
+
** order that they occur within the input text.
|
8249
|
+
**
|
8250
|
+
** If an xToken() callback returns any value other than SQLITE_OK, then
|
8251
|
+
** the tokenization should be abandoned and the xTokenize() method should
|
8252
|
+
** immediately return a copy of the xToken() return value. Or, if the
|
8253
|
+
** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
|
8254
|
+
** if an error occurs with the xTokenize() implementation itself, it
|
8255
|
+
** may abandon the tokenization and return any error code other than
|
8256
|
+
** SQLITE_OK or SQLITE_DONE.
|
8257
|
+
**
|
8258
|
+
** SYNONYM SUPPORT
|
8259
|
+
**
|
8260
|
+
** Custom tokenizers may also support synonyms. Consider a case in which a
|
8261
|
+
** user wishes to query for a phrase such as "first place". Using the
|
8262
|
+
** built-in tokenizers, the FTS5 query 'first + place' will match instances
|
8263
|
+
** of "first place" within the document set, but not alternative forms
|
8264
|
+
** such as "1st place". In some applications, it would be better to match
|
8265
|
+
** all instances of "first place" or "1st place" regardless of which form
|
8266
|
+
** the user specified in the MATCH query text.
|
8267
|
+
**
|
8268
|
+
** There are several ways to approach this in FTS5:
|
8269
|
+
**
|
8270
|
+
** <ol><li> By mapping all synonyms to a single token. In this case, the
|
8271
|
+
** In the above example, this means that the tokenizer returns the
|
8272
|
+
** same token for inputs "first" and "1st". Say that token is in
|
8273
|
+
** fact "first", so that when the user inserts the document "I won
|
8274
|
+
** 1st place" entries are added to the index for tokens "i", "won",
|
8275
|
+
** "first" and "place". If the user then queries for '1st + place',
|
8276
|
+
** the tokenizer substitutes "first" for "1st" and the query works
|
8277
|
+
** as expected.
|
8278
|
+
**
|
8279
|
+
** <li> By adding multiple synonyms for a single term to the FTS index.
|
8280
|
+
** In this case, when tokenizing query text, the tokenizer may
|
8281
|
+
** provide multiple synonyms for a single term within the document.
|
8282
|
+
** FTS5 then queries the index for each synonym individually. For
|
8283
|
+
** example, faced with the query:
|
8284
|
+
**
|
8285
|
+
** <codeblock>
|
8286
|
+
** ... MATCH 'first place'</codeblock>
|
8287
|
+
**
|
8288
|
+
** the tokenizer offers both "1st" and "first" as synonyms for the
|
8289
|
+
** first token in the MATCH query and FTS5 effectively runs a query
|
8290
|
+
** similar to:
|
8291
|
+
**
|
8292
|
+
** <codeblock>
|
8293
|
+
** ... MATCH '(first OR 1st) place'</codeblock>
|
8294
|
+
**
|
8295
|
+
** except that, for the purposes of auxiliary functions, the query
|
8296
|
+
** still appears to contain just two phrases - "(first OR 1st)"
|
8297
|
+
** being treated as a single phrase.
|
8298
|
+
**
|
8299
|
+
** <li> By adding multiple synonyms for a single term to the FTS index.
|
8300
|
+
** Using this method, when tokenizing document text, the tokenizer
|
8301
|
+
** provides multiple synonyms for each token. So that when a
|
8302
|
+
** document such as "I won first place" is tokenized, entries are
|
8303
|
+
** added to the FTS index for "i", "won", "first", "1st" and
|
8304
|
+
** "place".
|
8305
|
+
**
|
8306
|
+
** This way, even if the tokenizer does not provide synonyms
|
8307
|
+
** when tokenizing query text (it should not - to do would be
|
8308
|
+
** inefficient), it doesn't matter if the user queries for
|
8309
|
+
** 'first + place' or '1st + place', as there are entires in the
|
8310
|
+
** FTS index corresponding to both forms of the first token.
|
8311
|
+
** </ol>
|
8312
|
+
**
|
8313
|
+
** Whether it is parsing document or query text, any call to xToken that
|
8314
|
+
** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
|
8315
|
+
** is considered to supply a synonym for the previous token. For example,
|
8316
|
+
** when parsing the document "I won first place", a tokenizer that supports
|
8317
|
+
** synonyms would call xToken() 5 times, as follows:
|
8318
|
+
**
|
8319
|
+
** <codeblock>
|
8320
|
+
** xToken(pCtx, 0, "i", 1, 0, 1);
|
8321
|
+
** xToken(pCtx, 0, "won", 3, 2, 5);
|
8322
|
+
** xToken(pCtx, 0, "first", 5, 6, 11);
|
8323
|
+
** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);
|
8324
|
+
** xToken(pCtx, 0, "place", 5, 12, 17);
|
8325
|
+
**</codeblock>
|
8326
|
+
**
|
8327
|
+
** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
|
8328
|
+
** xToken() is called. Multiple synonyms may be specified for a single token
|
8329
|
+
** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
|
8330
|
+
** There is no limit to the number of synonyms that may be provided for a
|
8331
|
+
** single token.
|
8332
|
+
**
|
8333
|
+
** In many cases, method (1) above is the best approach. It does not add
|
8334
|
+
** extra data to the FTS index or require FTS5 to query for multiple terms,
|
8335
|
+
** so it is efficient in terms of disk space and query speed. However, it
|
8336
|
+
** does not support prefix queries very well. If, as suggested above, the
|
8337
|
+
** token "first" is subsituted for "1st" by the tokenizer, then the query:
|
8338
|
+
**
|
8339
|
+
** <codeblock>
|
8340
|
+
** ... MATCH '1s*'</codeblock>
|
8341
|
+
**
|
8342
|
+
** will not match documents that contain the token "1st" (as the tokenizer
|
8343
|
+
** will probably not map "1s" to any prefix of "first").
|
8344
|
+
**
|
8345
|
+
** For full prefix support, method (3) may be preferred. In this case,
|
8346
|
+
** because the index contains entries for both "first" and "1st", prefix
|
8347
|
+
** queries such as 'fi*' or '1s*' will match correctly. However, because
|
8348
|
+
** extra entries are added to the FTS index, this method uses more space
|
8349
|
+
** within the database.
|
8350
|
+
**
|
8351
|
+
** Method (2) offers a midpoint between (1) and (3). Using this method,
|
8352
|
+
** a query such as '1s*' will match documents that contain the literal
|
8353
|
+
** token "1st", but not "first" (assuming the tokenizer is not able to
|
8354
|
+
** provide synonyms for prefixes). However, a non-prefix query like '1st'
|
8355
|
+
** will match against "1st" and "first". This method does not require
|
8356
|
+
** extra disk space, as no extra entries are added to the FTS index.
|
8357
|
+
** On the other hand, it may require more CPU cycles to run MATCH queries,
|
8358
|
+
** as separate queries of the FTS index are required for each synonym.
|
8359
|
+
**
|
8360
|
+
** When using methods (2) or (3), it is important that the tokenizer only
|
8361
|
+
** provide synonyms when tokenizing document text (method (2)) or query
|
8362
|
+
** text (method (3)), not both. Doing so will not cause any errors, but is
|
8363
|
+
** inefficient.
|
8364
|
+
*/
|
8365
|
+
typedef struct Fts5Tokenizer Fts5Tokenizer;
|
8366
|
+
typedef struct fts5_tokenizer fts5_tokenizer;
|
8367
|
+
struct fts5_tokenizer {
|
8368
|
+
int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
|
8369
|
+
void (*xDelete)(Fts5Tokenizer*);
|
8370
|
+
int (*xTokenize)(Fts5Tokenizer*,
|
8371
|
+
void *pCtx,
|
8372
|
+
int flags, /* Mask of FTS5_TOKENIZE_* flags */
|
8373
|
+
const char *pText, int nText,
|
8374
|
+
int (*xToken)(
|
8375
|
+
void *pCtx, /* Copy of 2nd argument to xTokenize() */
|
8376
|
+
int tflags, /* Mask of FTS5_TOKEN_* flags */
|
8377
|
+
const char *pToken, /* Pointer to buffer containing token */
|
8378
|
+
int nToken, /* Size of token in bytes */
|
8379
|
+
int iStart, /* Byte offset of token within input text */
|
8380
|
+
int iEnd /* Byte offset of end of token within input text */
|
8381
|
+
)
|
8382
|
+
);
|
8383
|
+
};
|
8384
|
+
|
8385
|
+
/* Flags that may be passed as the third argument to xTokenize() */
|
8386
|
+
#define FTS5_TOKENIZE_QUERY 0x0001
|
8387
|
+
#define FTS5_TOKENIZE_PREFIX 0x0002
|
8388
|
+
#define FTS5_TOKENIZE_DOCUMENT 0x0004
|
8389
|
+
#define FTS5_TOKENIZE_AUX 0x0008
|
8390
|
+
|
8391
|
+
/* Flags that may be passed by the tokenizer implementation back to FTS5
|
8392
|
+
** as the third argument to the supplied xToken callback. */
|
8393
|
+
#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
|
8394
|
+
|
8395
|
+
/*
|
8396
|
+
** END OF CUSTOM TOKENIZERS
|
8397
|
+
*************************************************************************/
|
8398
|
+
|
8399
|
+
/*************************************************************************
|
8400
|
+
** FTS5 EXTENSION REGISTRATION API
|
8401
|
+
*/
|
8402
|
+
typedef struct fts5_api fts5_api;
|
8403
|
+
struct fts5_api {
|
8404
|
+
int iVersion; /* Currently always set to 2 */
|
8405
|
+
|
8406
|
+
/* Create a new tokenizer */
|
8407
|
+
int (*xCreateTokenizer)(
|
8408
|
+
fts5_api *pApi,
|
8409
|
+
const char *zName,
|
8410
|
+
void *pContext,
|
8411
|
+
fts5_tokenizer *pTokenizer,
|
8412
|
+
void (*xDestroy)(void*)
|
8413
|
+
);
|
8414
|
+
|
8415
|
+
/* Find an existing tokenizer */
|
8416
|
+
int (*xFindTokenizer)(
|
8417
|
+
fts5_api *pApi,
|
8418
|
+
const char *zName,
|
8419
|
+
void **ppContext,
|
8420
|
+
fts5_tokenizer *pTokenizer
|
8421
|
+
);
|
8422
|
+
|
8423
|
+
/* Create a new auxiliary function */
|
8424
|
+
int (*xCreateFunction)(
|
8425
|
+
fts5_api *pApi,
|
8426
|
+
const char *zName,
|
8427
|
+
void *pContext,
|
8428
|
+
fts5_extension_function xFunction,
|
8429
|
+
void (*xDestroy)(void*)
|
8430
|
+
);
|
8431
|
+
};
|
8432
|
+
|
8433
|
+
/*
|
8434
|
+
** END OF REGISTRATION API
|
8435
|
+
*************************************************************************/
|
8436
|
+
|
8437
|
+
#ifdef __cplusplus
|
8438
|
+
} /* end of the 'extern "C"' block */
|
8439
|
+
#endif
|
8440
|
+
|
8441
|
+
#endif /* _FTS5_H */
|
8442
|
+
|
8443
|
+
|