amalgalite 1.4.1-x86-mingw32 → 1.5.0-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -111,9 +111,9 @@ extern "C" {
111
111
  ** [sqlite3_libversion_number()], [sqlite3_sourceid()],
112
112
  ** [sqlite_version()] and [sqlite_source_id()].
113
113
  */
114
- #define SQLITE_VERSION "3.8.11.1"
115
- #define SQLITE_VERSION_NUMBER 3008011
116
- #define SQLITE_SOURCE_ID "2015-07-29 20:00:57 cf538e2783e468bbc25e7cb2a9ee64d3e0e80b2f"
114
+ #define SQLITE_VERSION "3.9.2"
115
+ #define SQLITE_VERSION_NUMBER 3009002
116
+ #define SQLITE_SOURCE_ID "2015-11-02 18:31:45 bda77dda9697c463c3d0704014d51627fceee328"
117
117
 
118
118
  /*
119
119
  ** CAPI3REF: Run-Time Library Version Numbers
@@ -124,7 +124,7 @@ extern "C" {
124
124
  ** but are associated with the library instead of the header file. ^(Cautious
125
125
  ** programmers might include assert() statements in their application to
126
126
  ** verify that values returned by these interfaces match the macros in
127
- ** the header, and thus insure that the application is
127
+ ** the header, and thus ensure that the application is
128
128
  ** compiled with matching library and header files.
129
129
  **
130
130
  ** <blockquote><pre>
@@ -374,7 +374,7 @@ typedef int (*sqlite3_callback)(void*,int,char**, char**);
374
374
  ** Restrictions:
375
375
  **
376
376
  ** <ul>
377
- ** <li> The application must insure that the 1st parameter to sqlite3_exec()
377
+ ** <li> The application must ensure that the 1st parameter to sqlite3_exec()
378
378
  ** is a valid and open [database connection].
379
379
  ** <li> The application must not close the [database connection] specified by
380
380
  ** the 1st parameter to sqlite3_exec() while sqlite3_exec() is running.
@@ -477,6 +477,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_exec(
477
477
  #define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8))
478
478
  #define SQLITE_IOERR_GETTEMPPATH (SQLITE_IOERR | (25<<8))
479
479
  #define SQLITE_IOERR_CONVPATH (SQLITE_IOERR | (26<<8))
480
+ #define SQLITE_IOERR_VNODE (SQLITE_IOERR | (27<<8))
480
481
  #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8))
481
482
  #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8))
482
483
  #define SQLITE_BUSY_SNAPSHOT (SQLITE_BUSY | (2<<8))
@@ -1366,9 +1367,11 @@ SQLITE_API int SQLITE_STDCALL sqlite3_os_end(void);
1366
1367
  ** applications and so this routine is usually not necessary. It is
1367
1368
  ** provided to support rare applications with unusual needs.
1368
1369
  **
1369
- ** The sqlite3_config() interface is not threadsafe. The application
1370
- ** must insure that no other SQLite interfaces are invoked by other
1371
- ** threads while sqlite3_config() is running. Furthermore, sqlite3_config()
1370
+ ** <b>The sqlite3_config() interface is not threadsafe. The application
1371
+ ** must ensure that no other SQLite interfaces are invoked by other
1372
+ ** threads while sqlite3_config() is running.</b>
1373
+ **
1374
+ ** The sqlite3_config() interface
1372
1375
  ** may only be invoked prior to library initialization using
1373
1376
  ** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()].
1374
1377
  ** ^If sqlite3_config() is called after [sqlite3_initialize()] and before
@@ -3373,7 +3376,8 @@ SQLITE_API int SQLITE_STDCALL sqlite3_stmt_readonly(sqlite3_stmt *pStmt);
3373
3376
  **
3374
3377
  ** ^The sqlite3_stmt_busy(S) interface returns true (non-zero) if the
3375
3378
  ** [prepared statement] S has been stepped at least once using
3376
- ** [sqlite3_step(S)] but has not run to completion and/or has not
3379
+ ** [sqlite3_step(S)] but has neither run to completion (returned
3380
+ ** [SQLITE_DONE] from [sqlite3_step(S)]) nor
3377
3381
  ** been reset using [sqlite3_reset(S)]. ^The sqlite3_stmt_busy(S)
3378
3382
  ** interface returns false if S is a NULL pointer. If S is not a
3379
3383
  ** NULL pointer and is not a pointer to a valid [prepared statement]
@@ -3626,7 +3630,7 @@ SQLITE_API const char *SQLITE_STDCALL sqlite3_bind_parameter_name(sqlite3_stmt*,
3626
3630
  **
3627
3631
  ** See also: [sqlite3_bind_blob|sqlite3_bind()],
3628
3632
  ** [sqlite3_bind_parameter_count()], and
3629
- ** [sqlite3_bind_parameter_index()].
3633
+ ** [sqlite3_bind_parameter_name()].
3630
3634
  */
3631
3635
  SQLITE_API int SQLITE_STDCALL sqlite3_bind_parameter_index(sqlite3_stmt*, const char *zName);
3632
3636
 
@@ -4355,6 +4359,22 @@ SQLITE_API const void *SQLITE_STDCALL sqlite3_value_text16be(sqlite3_value*);
4355
4359
  SQLITE_API int SQLITE_STDCALL sqlite3_value_type(sqlite3_value*);
4356
4360
  SQLITE_API int SQLITE_STDCALL sqlite3_value_numeric_type(sqlite3_value*);
4357
4361
 
4362
+ /*
4363
+ ** CAPI3REF: Finding The Subtype Of SQL Values
4364
+ ** METHOD: sqlite3_value
4365
+ **
4366
+ ** The sqlite3_value_subtype(V) function returns the subtype for
4367
+ ** an [application-defined SQL function] argument V. The subtype
4368
+ ** information can be used to pass a limited amount of context from
4369
+ ** one SQL function to another. Use the [sqlite3_result_subtype()]
4370
+ ** routine to set the subtype for the return value of an SQL function.
4371
+ **
4372
+ ** SQLite makes no use of subtype itself. It merely passes the subtype
4373
+ ** from the result of one [application-defined SQL function] into the
4374
+ ** input of another.
4375
+ */
4376
+ SQLITE_API unsigned int SQLITE_STDCALL sqlite3_value_subtype(sqlite3_value*);
4377
+
4358
4378
  /*
4359
4379
  ** CAPI3REF: Copy And Free SQL Values
4360
4380
  ** METHOD: sqlite3_value
@@ -4654,6 +4674,21 @@ SQLITE_API void SQLITE_STDCALL sqlite3_result_value(sqlite3_context*, sqlite3_va
4654
4674
  SQLITE_API void SQLITE_STDCALL sqlite3_result_zeroblob(sqlite3_context*, int n);
4655
4675
  SQLITE_API int SQLITE_STDCALL sqlite3_result_zeroblob64(sqlite3_context*, sqlite3_uint64 n);
4656
4676
 
4677
+
4678
+ /*
4679
+ ** CAPI3REF: Setting The Subtype Of An SQL Function
4680
+ ** METHOD: sqlite3_context
4681
+ **
4682
+ ** The sqlite3_result_subtype(C,T) function causes the subtype of
4683
+ ** the result from the [application-defined SQL function] with
4684
+ ** [sqlite3_context] C to be the value T. Only the lower 8 bits
4685
+ ** of the subtype T are preserved in current versions of SQLite;
4686
+ ** higher order bits are discarded.
4687
+ ** The number of subtype bytes preserved by SQLite might increase
4688
+ ** in future releases of SQLite.
4689
+ */
4690
+ SQLITE_API void SQLITE_STDCALL sqlite3_result_subtype(sqlite3_context*,unsigned int);
4691
+
4657
4692
  /*
4658
4693
  ** CAPI3REF: Define New Collating Sequences
4659
4694
  ** METHOD: sqlite3
@@ -5599,13 +5634,31 @@ struct sqlite3_module {
5599
5634
  ** ^The estimatedRows value is an estimate of the number of rows that
5600
5635
  ** will be returned by the strategy.
5601
5636
  **
5637
+ ** The xBestIndex method may optionally populate the idxFlags field with a
5638
+ ** mask of SQLITE_INDEX_SCAN_* flags. Currently there is only one such flag -
5639
+ ** SQLITE_INDEX_SCAN_UNIQUE. If the xBestIndex method sets this flag, SQLite
5640
+ ** assumes that the strategy may visit at most one row.
5641
+ **
5642
+ ** Additionally, if xBestIndex sets the SQLITE_INDEX_SCAN_UNIQUE flag, then
5643
+ ** SQLite also assumes that if a call to the xUpdate() method is made as
5644
+ ** part of the same statement to delete or update a virtual table row and the
5645
+ ** implementation returns SQLITE_CONSTRAINT, then there is no need to rollback
5646
+ ** any database changes. In other words, if the xUpdate() returns
5647
+ ** SQLITE_CONSTRAINT, the database contents must be exactly as they were
5648
+ ** before xUpdate was called. By contrast, if SQLITE_INDEX_SCAN_UNIQUE is not
5649
+ ** set and xUpdate returns SQLITE_CONSTRAINT, any database changes made by
5650
+ ** the xUpdate method are automatically rolled back by SQLite.
5651
+ **
5602
5652
  ** IMPORTANT: The estimatedRows field was added to the sqlite3_index_info
5603
5653
  ** structure for SQLite version 3.8.2. If a virtual table extension is
5604
5654
  ** used with an SQLite version earlier than 3.8.2, the results of attempting
5605
5655
  ** to read or write the estimatedRows field are undefined (but are likely
5606
5656
  ** to included crashing the application). The estimatedRows field should
5607
5657
  ** therefore only be used if [sqlite3_libversion_number()] returns a
5608
- ** value greater than or equal to 3008002.
5658
+ ** value greater than or equal to 3008002. Similarly, the idxFlags field
5659
+ ** was added for version 3.9.0. It may therefore only be used if
5660
+ ** sqlite3_libversion_number() returns a value greater than or equal to
5661
+ ** 3009000.
5609
5662
  */
5610
5663
  struct sqlite3_index_info {
5611
5664
  /* Inputs */
@@ -5633,8 +5686,15 @@ struct sqlite3_index_info {
5633
5686
  double estimatedCost; /* Estimated cost of using this index */
5634
5687
  /* Fields below are only available in SQLite 3.8.2 and later */
5635
5688
  sqlite3_int64 estimatedRows; /* Estimated number of rows returned */
5689
+ /* Fields below are only available in SQLite 3.9.0 and later */
5690
+ int idxFlags; /* Mask of SQLITE_INDEX_SCAN_* flags */
5636
5691
  };
5637
5692
 
5693
+ /*
5694
+ ** CAPI3REF: Virtual Table Scan Flags
5695
+ */
5696
+ #define SQLITE_INDEX_SCAN_UNIQUE 1 /* Scan visits at most 1 row */
5697
+
5638
5698
  /*
5639
5699
  ** CAPI3REF: Virtual Table Constraint Operator Codes
5640
5700
  **
@@ -6092,6 +6152,9 @@ SQLITE_API int SQLITE_STDCALL sqlite3_vfs_unregister(sqlite3_vfs*);
6092
6152
  ** <li> SQLITE_MUTEX_STATIC_APP1
6093
6153
  ** <li> SQLITE_MUTEX_STATIC_APP2
6094
6154
  ** <li> SQLITE_MUTEX_STATIC_APP3
6155
+ ** <li> SQLITE_MUTEX_STATIC_VFS1
6156
+ ** <li> SQLITE_MUTEX_STATIC_VFS2
6157
+ ** <li> SQLITE_MUTEX_STATIC_VFS3
6095
6158
  ** </ul>
6096
6159
  **
6097
6160
  ** ^The first two constants (SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE)
@@ -7858,3 +7921,523 @@ struct sqlite3_rtree_query_info {
7858
7921
 
7859
7922
  #endif /* ifndef _SQLITE3RTREE_H_ */
7860
7923
 
7924
+ /*
7925
+ ** 2014 May 31
7926
+ **
7927
+ ** The author disclaims copyright to this source code. In place of
7928
+ ** a legal notice, here is a blessing:
7929
+ **
7930
+ ** May you do good and not evil.
7931
+ ** May you find forgiveness for yourself and forgive others.
7932
+ ** May you share freely, never taking more than you give.
7933
+ **
7934
+ ******************************************************************************
7935
+ **
7936
+ ** Interfaces to extend FTS5. Using the interfaces defined in this file,
7937
+ ** FTS5 may be extended with:
7938
+ **
7939
+ ** * custom tokenizers, and
7940
+ ** * custom auxiliary functions.
7941
+ */
7942
+
7943
+
7944
+ #ifndef _FTS5_H
7945
+ #define _FTS5_H
7946
+
7947
+
7948
+ #ifdef __cplusplus
7949
+ extern "C" {
7950
+ #endif
7951
+
7952
+ /*************************************************************************
7953
+ ** CUSTOM AUXILIARY FUNCTIONS
7954
+ **
7955
+ ** Virtual table implementations may overload SQL functions by implementing
7956
+ ** the sqlite3_module.xFindFunction() method.
7957
+ */
7958
+
7959
+ typedef struct Fts5ExtensionApi Fts5ExtensionApi;
7960
+ typedef struct Fts5Context Fts5Context;
7961
+ typedef struct Fts5PhraseIter Fts5PhraseIter;
7962
+
7963
+ typedef void (*fts5_extension_function)(
7964
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
7965
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
7966
+ sqlite3_context *pCtx, /* Context for returning result/error */
7967
+ int nVal, /* Number of values in apVal[] array */
7968
+ sqlite3_value **apVal /* Array of trailing arguments */
7969
+ );
7970
+
7971
+ struct Fts5PhraseIter {
7972
+ const unsigned char *a;
7973
+ const unsigned char *b;
7974
+ };
7975
+
7976
+ /*
7977
+ ** EXTENSION API FUNCTIONS
7978
+ **
7979
+ ** xUserData(pFts):
7980
+ ** Return a copy of the context pointer the extension function was
7981
+ ** registered with.
7982
+ **
7983
+ ** xColumnTotalSize(pFts, iCol, pnToken):
7984
+ ** If parameter iCol is less than zero, set output variable *pnToken
7985
+ ** to the total number of tokens in the FTS5 table. Or, if iCol is
7986
+ ** non-negative but less than the number of columns in the table, return
7987
+ ** the total number of tokens in column iCol, considering all rows in
7988
+ ** the FTS5 table.
7989
+ **
7990
+ ** If parameter iCol is greater than or equal to the number of columns
7991
+ ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
7992
+ ** an OOM condition or IO error), an appropriate SQLite error code is
7993
+ ** returned.
7994
+ **
7995
+ ** xColumnCount(pFts):
7996
+ ** Return the number of columns in the table.
7997
+ **
7998
+ ** xColumnSize(pFts, iCol, pnToken):
7999
+ ** If parameter iCol is less than zero, set output variable *pnToken
8000
+ ** to the total number of tokens in the current row. Or, if iCol is
8001
+ ** non-negative but less than the number of columns in the table, set
8002
+ ** *pnToken to the number of tokens in column iCol of the current row.
8003
+ **
8004
+ ** If parameter iCol is greater than or equal to the number of columns
8005
+ ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
8006
+ ** an OOM condition or IO error), an appropriate SQLite error code is
8007
+ ** returned.
8008
+ **
8009
+ ** xColumnText:
8010
+ ** This function attempts to retrieve the text of column iCol of the
8011
+ ** current document. If successful, (*pz) is set to point to a buffer
8012
+ ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
8013
+ ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
8014
+ ** if an error occurs, an SQLite error code is returned and the final values
8015
+ ** of (*pz) and (*pn) are undefined.
8016
+ **
8017
+ ** xPhraseCount:
8018
+ ** Returns the number of phrases in the current query expression.
8019
+ **
8020
+ ** xPhraseSize:
8021
+ ** Returns the number of tokens in phrase iPhrase of the query. Phrases
8022
+ ** are numbered starting from zero.
8023
+ **
8024
+ ** xInstCount:
8025
+ ** Set *pnInst to the total number of occurrences of all phrases within
8026
+ ** the query within the current row. Return SQLITE_OK if successful, or
8027
+ ** an error code (i.e. SQLITE_NOMEM) if an error occurs.
8028
+ **
8029
+ ** xInst:
8030
+ ** Query for the details of phrase match iIdx within the current row.
8031
+ ** Phrase matches are numbered starting from zero, so the iIdx argument
8032
+ ** should be greater than or equal to zero and smaller than the value
8033
+ ** output by xInstCount().
8034
+ **
8035
+ ** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)
8036
+ ** if an error occurs.
8037
+ **
8038
+ ** xRowid:
8039
+ ** Returns the rowid of the current row.
8040
+ **
8041
+ ** xTokenize:
8042
+ ** Tokenize text using the tokenizer belonging to the FTS5 table.
8043
+ **
8044
+ ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
8045
+ ** This API function is used to query the FTS table for phrase iPhrase
8046
+ ** of the current query. Specifically, a query equivalent to:
8047
+ **
8048
+ ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
8049
+ **
8050
+ ** with $p set to a phrase equivalent to the phrase iPhrase of the
8051
+ ** current query is executed. For each row visited, the callback function
8052
+ ** passed as the fourth argument is invoked. The context and API objects
8053
+ ** passed to the callback function may be used to access the properties of
8054
+ ** each matched row. Invoking Api.xUserData() returns a copy of the pointer
8055
+ ** passed as the third argument to pUserData.
8056
+ **
8057
+ ** If the callback function returns any value other than SQLITE_OK, the
8058
+ ** query is abandoned and the xQueryPhrase function returns immediately.
8059
+ ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
8060
+ ** Otherwise, the error code is propagated upwards.
8061
+ **
8062
+ ** If the query runs to completion without incident, SQLITE_OK is returned.
8063
+ ** Or, if some error occurs before the query completes or is aborted by
8064
+ ** the callback, an SQLite error code is returned.
8065
+ **
8066
+ **
8067
+ ** xSetAuxdata(pFts5, pAux, xDelete)
8068
+ **
8069
+ ** Save the pointer passed as the second argument as the extension functions
8070
+ ** "auxiliary data". The pointer may then be retrieved by the current or any
8071
+ ** future invocation of the same fts5 extension function made as part of
8072
+ ** of the same MATCH query using the xGetAuxdata() API.
8073
+ **
8074
+ ** Each extension function is allocated a single auxiliary data slot for
8075
+ ** each FTS query (MATCH expression). If the extension function is invoked
8076
+ ** more than once for a single FTS query, then all invocations share a
8077
+ ** single auxiliary data context.
8078
+ **
8079
+ ** If there is already an auxiliary data pointer when this function is
8080
+ ** invoked, then it is replaced by the new pointer. If an xDelete callback
8081
+ ** was specified along with the original pointer, it is invoked at this
8082
+ ** point.
8083
+ **
8084
+ ** The xDelete callback, if one is specified, is also invoked on the
8085
+ ** auxiliary data pointer after the FTS5 query has finished.
8086
+ **
8087
+ ** If an error (e.g. an OOM condition) occurs within this function, an
8088
+ ** the auxiliary data is set to NULL and an error code returned. If the
8089
+ ** xDelete parameter was not NULL, it is invoked on the auxiliary data
8090
+ ** pointer before returning.
8091
+ **
8092
+ **
8093
+ ** xGetAuxdata(pFts5, bClear)
8094
+ **
8095
+ ** Returns the current auxiliary data pointer for the fts5 extension
8096
+ ** function. See the xSetAuxdata() method for details.
8097
+ **
8098
+ ** If the bClear argument is non-zero, then the auxiliary data is cleared
8099
+ ** (set to NULL) before this function returns. In this case the xDelete,
8100
+ ** if any, is not invoked.
8101
+ **
8102
+ **
8103
+ ** xRowCount(pFts5, pnRow)
8104
+ **
8105
+ ** This function is used to retrieve the total number of rows in the table.
8106
+ ** In other words, the same value that would be returned by:
8107
+ **
8108
+ ** SELECT count(*) FROM ftstable;
8109
+ **
8110
+ ** xPhraseFirst()
8111
+ ** This function is used, along with type Fts5PhraseIter and the xPhraseNext
8112
+ ** method, to iterate through all instances of a single query phrase within
8113
+ ** the current row. This is the same information as is accessible via the
8114
+ ** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
8115
+ ** to use, this API may be faster under some circumstances. To iterate
8116
+ ** through instances of phrase iPhrase, use the following code:
8117
+ **
8118
+ ** Fts5PhraseIter iter;
8119
+ ** int iCol, iOff;
8120
+ ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
8121
+ ** iOff>=0;
8122
+ ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
8123
+ ** ){
8124
+ ** // An instance of phrase iPhrase at offset iOff of column iCol
8125
+ ** }
8126
+ **
8127
+ ** The Fts5PhraseIter structure is defined above. Applications should not
8128
+ ** modify this structure directly - it should only be used as shown above
8129
+ ** with the xPhraseFirst() and xPhraseNext() API methods.
8130
+ **
8131
+ ** xPhraseNext()
8132
+ ** See xPhraseFirst above.
8133
+ */
8134
+ struct Fts5ExtensionApi {
8135
+ int iVersion; /* Currently always set to 1 */
8136
+
8137
+ void *(*xUserData)(Fts5Context*);
8138
+
8139
+ int (*xColumnCount)(Fts5Context*);
8140
+ int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
8141
+ int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
8142
+
8143
+ int (*xTokenize)(Fts5Context*,
8144
+ const char *pText, int nText, /* Text to tokenize */
8145
+ void *pCtx, /* Context passed to xToken() */
8146
+ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
8147
+ );
8148
+
8149
+ int (*xPhraseCount)(Fts5Context*);
8150
+ int (*xPhraseSize)(Fts5Context*, int iPhrase);
8151
+
8152
+ int (*xInstCount)(Fts5Context*, int *pnInst);
8153
+ int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
8154
+
8155
+ sqlite3_int64 (*xRowid)(Fts5Context*);
8156
+ int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
8157
+ int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
8158
+
8159
+ int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
8160
+ int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
8161
+ );
8162
+ int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
8163
+ void *(*xGetAuxdata)(Fts5Context*, int bClear);
8164
+
8165
+ void (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
8166
+ void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
8167
+ };
8168
+
8169
+ /*
8170
+ ** CUSTOM AUXILIARY FUNCTIONS
8171
+ *************************************************************************/
8172
+
8173
+ /*************************************************************************
8174
+ ** CUSTOM TOKENIZERS
8175
+ **
8176
+ ** Applications may also register custom tokenizer types. A tokenizer
8177
+ ** is registered by providing fts5 with a populated instance of the
8178
+ ** following structure. All structure methods must be defined, setting
8179
+ ** any member of the fts5_tokenizer struct to NULL leads to undefined
8180
+ ** behaviour. The structure methods are expected to function as follows:
8181
+ **
8182
+ ** xCreate:
8183
+ ** This function is used to allocate and inititalize a tokenizer instance.
8184
+ ** A tokenizer instance is required to actually tokenize text.
8185
+ **
8186
+ ** The first argument passed to this function is a copy of the (void*)
8187
+ ** pointer provided by the application when the fts5_tokenizer object
8188
+ ** was registered with FTS5 (the third argument to xCreateTokenizer()).
8189
+ ** The second and third arguments are an array of nul-terminated strings
8190
+ ** containing the tokenizer arguments, if any, specified following the
8191
+ ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
8192
+ ** to create the FTS5 table.
8193
+ **
8194
+ ** The final argument is an output variable. If successful, (*ppOut)
8195
+ ** should be set to point to the new tokenizer handle and SQLITE_OK
8196
+ ** returned. If an error occurs, some value other than SQLITE_OK should
8197
+ ** be returned. In this case, fts5 assumes that the final value of *ppOut
8198
+ ** is undefined.
8199
+ **
8200
+ ** xDelete:
8201
+ ** This function is invoked to delete a tokenizer handle previously
8202
+ ** allocated using xCreate(). Fts5 guarantees that this function will
8203
+ ** be invoked exactly once for each successful call to xCreate().
8204
+ **
8205
+ ** xTokenize:
8206
+ ** This function is expected to tokenize the nText byte string indicated
8207
+ ** by argument pText. pText may or may not be nul-terminated. The first
8208
+ ** argument passed to this function is a pointer to an Fts5Tokenizer object
8209
+ ** returned by an earlier call to xCreate().
8210
+ **
8211
+ ** The second argument indicates the reason that FTS5 is requesting
8212
+ ** tokenization of the supplied text. This is always one of the following
8213
+ ** four values:
8214
+ **
8215
+ ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
8216
+ ** or removed from the FTS table. The tokenizer is being invoked to
8217
+ ** determine the set of tokens to add to (or delete from) the
8218
+ ** FTS index.
8219
+ **
8220
+ ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
8221
+ ** against the FTS index. The tokenizer is being called to tokenize
8222
+ ** a bareword or quoted string specified as part of the query.
8223
+ **
8224
+ ** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
8225
+ ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
8226
+ ** followed by a "*" character, indicating that the last token
8227
+ ** returned by the tokenizer will be treated as a token prefix.
8228
+ **
8229
+ ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
8230
+ ** satisfy an fts5_api.xTokenize() request made by an auxiliary
8231
+ ** function. Or an fts5_api.xColumnSize() request made by the same
8232
+ ** on a columnsize=0 database.
8233
+ ** </ul>
8234
+ **
8235
+ ** For each token in the input string, the supplied callback xToken() must
8236
+ ** be invoked. The first argument to it should be a copy of the pointer
8237
+ ** passed as the second argument to xTokenize(). The third and fourth
8238
+ ** arguments are a pointer to a buffer containing the token text, and the
8239
+ ** size of the token in bytes. The 4th and 5th arguments are the byte offsets
8240
+ ** of the first byte of and first byte immediately following the text from
8241
+ ** which the token is derived within the input.
8242
+ **
8243
+ ** The second argument passed to the xToken() callback ("tflags") should
8244
+ ** normally be set to 0. The exception is if the tokenizer supports
8245
+ ** synonyms. In this case see the discussion below for details.
8246
+ **
8247
+ ** FTS5 assumes the xToken() callback is invoked for each token in the
8248
+ ** order that they occur within the input text.
8249
+ **
8250
+ ** If an xToken() callback returns any value other than SQLITE_OK, then
8251
+ ** the tokenization should be abandoned and the xTokenize() method should
8252
+ ** immediately return a copy of the xToken() return value. Or, if the
8253
+ ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
8254
+ ** if an error occurs with the xTokenize() implementation itself, it
8255
+ ** may abandon the tokenization and return any error code other than
8256
+ ** SQLITE_OK or SQLITE_DONE.
8257
+ **
8258
+ ** SYNONYM SUPPORT
8259
+ **
8260
+ ** Custom tokenizers may also support synonyms. Consider a case in which a
8261
+ ** user wishes to query for a phrase such as "first place". Using the
8262
+ ** built-in tokenizers, the FTS5 query 'first + place' will match instances
8263
+ ** of "first place" within the document set, but not alternative forms
8264
+ ** such as "1st place". In some applications, it would be better to match
8265
+ ** all instances of "first place" or "1st place" regardless of which form
8266
+ ** the user specified in the MATCH query text.
8267
+ **
8268
+ ** There are several ways to approach this in FTS5:
8269
+ **
8270
+ ** <ol><li> By mapping all synonyms to a single token. In this case, the
8271
+ ** In the above example, this means that the tokenizer returns the
8272
+ ** same token for inputs "first" and "1st". Say that token is in
8273
+ ** fact "first", so that when the user inserts the document "I won
8274
+ ** 1st place" entries are added to the index for tokens "i", "won",
8275
+ ** "first" and "place". If the user then queries for '1st + place',
8276
+ ** the tokenizer substitutes "first" for "1st" and the query works
8277
+ ** as expected.
8278
+ **
8279
+ ** <li> By adding multiple synonyms for a single term to the FTS index.
8280
+ ** In this case, when tokenizing query text, the tokenizer may
8281
+ ** provide multiple synonyms for a single term within the document.
8282
+ ** FTS5 then queries the index for each synonym individually. For
8283
+ ** example, faced with the query:
8284
+ **
8285
+ ** <codeblock>
8286
+ ** ... MATCH 'first place'</codeblock>
8287
+ **
8288
+ ** the tokenizer offers both "1st" and "first" as synonyms for the
8289
+ ** first token in the MATCH query and FTS5 effectively runs a query
8290
+ ** similar to:
8291
+ **
8292
+ ** <codeblock>
8293
+ ** ... MATCH '(first OR 1st) place'</codeblock>
8294
+ **
8295
+ ** except that, for the purposes of auxiliary functions, the query
8296
+ ** still appears to contain just two phrases - "(first OR 1st)"
8297
+ ** being treated as a single phrase.
8298
+ **
8299
+ ** <li> By adding multiple synonyms for a single term to the FTS index.
8300
+ ** Using this method, when tokenizing document text, the tokenizer
8301
+ ** provides multiple synonyms for each token. So that when a
8302
+ ** document such as "I won first place" is tokenized, entries are
8303
+ ** added to the FTS index for "i", "won", "first", "1st" and
8304
+ ** "place".
8305
+ **
8306
+ ** This way, even if the tokenizer does not provide synonyms
8307
+ ** when tokenizing query text (it should not - to do would be
8308
+ ** inefficient), it doesn't matter if the user queries for
8309
+ ** 'first + place' or '1st + place', as there are entires in the
8310
+ ** FTS index corresponding to both forms of the first token.
8311
+ ** </ol>
8312
+ **
8313
+ ** Whether it is parsing document or query text, any call to xToken that
8314
+ ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
8315
+ ** is considered to supply a synonym for the previous token. For example,
8316
+ ** when parsing the document "I won first place", a tokenizer that supports
8317
+ ** synonyms would call xToken() 5 times, as follows:
8318
+ **
8319
+ ** <codeblock>
8320
+ ** xToken(pCtx, 0, "i", 1, 0, 1);
8321
+ ** xToken(pCtx, 0, "won", 3, 2, 5);
8322
+ ** xToken(pCtx, 0, "first", 5, 6, 11);
8323
+ ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);
8324
+ ** xToken(pCtx, 0, "place", 5, 12, 17);
8325
+ **</codeblock>
8326
+ **
8327
+ ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
8328
+ ** xToken() is called. Multiple synonyms may be specified for a single token
8329
+ ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
8330
+ ** There is no limit to the number of synonyms that may be provided for a
8331
+ ** single token.
8332
+ **
8333
+ ** In many cases, method (1) above is the best approach. It does not add
8334
+ ** extra data to the FTS index or require FTS5 to query for multiple terms,
8335
+ ** so it is efficient in terms of disk space and query speed. However, it
8336
+ ** does not support prefix queries very well. If, as suggested above, the
8337
+ ** token "first" is subsituted for "1st" by the tokenizer, then the query:
8338
+ **
8339
+ ** <codeblock>
8340
+ ** ... MATCH '1s*'</codeblock>
8341
+ **
8342
+ ** will not match documents that contain the token "1st" (as the tokenizer
8343
+ ** will probably not map "1s" to any prefix of "first").
8344
+ **
8345
+ ** For full prefix support, method (3) may be preferred. In this case,
8346
+ ** because the index contains entries for both "first" and "1st", prefix
8347
+ ** queries such as 'fi*' or '1s*' will match correctly. However, because
8348
+ ** extra entries are added to the FTS index, this method uses more space
8349
+ ** within the database.
8350
+ **
8351
+ ** Method (2) offers a midpoint between (1) and (3). Using this method,
8352
+ ** a query such as '1s*' will match documents that contain the literal
8353
+ ** token "1st", but not "first" (assuming the tokenizer is not able to
8354
+ ** provide synonyms for prefixes). However, a non-prefix query like '1st'
8355
+ ** will match against "1st" and "first". This method does not require
8356
+ ** extra disk space, as no extra entries are added to the FTS index.
8357
+ ** On the other hand, it may require more CPU cycles to run MATCH queries,
8358
+ ** as separate queries of the FTS index are required for each synonym.
8359
+ **
8360
+ ** When using methods (2) or (3), it is important that the tokenizer only
8361
+ ** provide synonyms when tokenizing document text (method (2)) or query
8362
+ ** text (method (3)), not both. Doing so will not cause any errors, but is
8363
+ ** inefficient.
8364
+ */
8365
+ typedef struct Fts5Tokenizer Fts5Tokenizer;
8366
+ typedef struct fts5_tokenizer fts5_tokenizer;
8367
+ struct fts5_tokenizer {
8368
+ int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
8369
+ void (*xDelete)(Fts5Tokenizer*);
8370
+ int (*xTokenize)(Fts5Tokenizer*,
8371
+ void *pCtx,
8372
+ int flags, /* Mask of FTS5_TOKENIZE_* flags */
8373
+ const char *pText, int nText,
8374
+ int (*xToken)(
8375
+ void *pCtx, /* Copy of 2nd argument to xTokenize() */
8376
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
8377
+ const char *pToken, /* Pointer to buffer containing token */
8378
+ int nToken, /* Size of token in bytes */
8379
+ int iStart, /* Byte offset of token within input text */
8380
+ int iEnd /* Byte offset of end of token within input text */
8381
+ )
8382
+ );
8383
+ };
8384
+
8385
+ /* Flags that may be passed as the third argument to xTokenize() */
8386
+ #define FTS5_TOKENIZE_QUERY 0x0001
8387
+ #define FTS5_TOKENIZE_PREFIX 0x0002
8388
+ #define FTS5_TOKENIZE_DOCUMENT 0x0004
8389
+ #define FTS5_TOKENIZE_AUX 0x0008
8390
+
8391
+ /* Flags that may be passed by the tokenizer implementation back to FTS5
8392
+ ** as the third argument to the supplied xToken callback. */
8393
+ #define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
8394
+
8395
+ /*
8396
+ ** END OF CUSTOM TOKENIZERS
8397
+ *************************************************************************/
8398
+
8399
+ /*************************************************************************
8400
+ ** FTS5 EXTENSION REGISTRATION API
8401
+ */
8402
+ typedef struct fts5_api fts5_api;
8403
+ struct fts5_api {
8404
+ int iVersion; /* Currently always set to 2 */
8405
+
8406
+ /* Create a new tokenizer */
8407
+ int (*xCreateTokenizer)(
8408
+ fts5_api *pApi,
8409
+ const char *zName,
8410
+ void *pContext,
8411
+ fts5_tokenizer *pTokenizer,
8412
+ void (*xDestroy)(void*)
8413
+ );
8414
+
8415
+ /* Find an existing tokenizer */
8416
+ int (*xFindTokenizer)(
8417
+ fts5_api *pApi,
8418
+ const char *zName,
8419
+ void **ppContext,
8420
+ fts5_tokenizer *pTokenizer
8421
+ );
8422
+
8423
+ /* Create a new auxiliary function */
8424
+ int (*xCreateFunction)(
8425
+ fts5_api *pApi,
8426
+ const char *zName,
8427
+ void *pContext,
8428
+ fts5_extension_function xFunction,
8429
+ void (*xDestroy)(void*)
8430
+ );
8431
+ };
8432
+
8433
+ /*
8434
+ ** END OF REGISTRATION API
8435
+ *************************************************************************/
8436
+
8437
+ #ifdef __cplusplus
8438
+ } /* end of the 'extern "C"' block */
8439
+ #endif
8440
+
8441
+ #endif /* _FTS5_H */
8442
+
8443
+