amalgalite 1.4.1 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -111,9 +111,9 @@ extern "C" {
111
111
  ** [sqlite3_libversion_number()], [sqlite3_sourceid()],
112
112
  ** [sqlite_version()] and [sqlite_source_id()].
113
113
  */
114
- #define SQLITE_VERSION "3.8.11.1"
115
- #define SQLITE_VERSION_NUMBER 3008011
116
- #define SQLITE_SOURCE_ID "2015-07-29 20:00:57 cf538e2783e468bbc25e7cb2a9ee64d3e0e80b2f"
114
+ #define SQLITE_VERSION "3.9.2"
115
+ #define SQLITE_VERSION_NUMBER 3009002
116
+ #define SQLITE_SOURCE_ID "2015-11-02 18:31:45 bda77dda9697c463c3d0704014d51627fceee328"
117
117
 
118
118
  /*
119
119
  ** CAPI3REF: Run-Time Library Version Numbers
@@ -124,7 +124,7 @@ extern "C" {
124
124
  ** but are associated with the library instead of the header file. ^(Cautious
125
125
  ** programmers might include assert() statements in their application to
126
126
  ** verify that values returned by these interfaces match the macros in
127
- ** the header, and thus insure that the application is
127
+ ** the header, and thus ensure that the application is
128
128
  ** compiled with matching library and header files.
129
129
  **
130
130
  ** <blockquote><pre>
@@ -374,7 +374,7 @@ typedef int (*sqlite3_callback)(void*,int,char**, char**);
374
374
  ** Restrictions:
375
375
  **
376
376
  ** <ul>
377
- ** <li> The application must insure that the 1st parameter to sqlite3_exec()
377
+ ** <li> The application must ensure that the 1st parameter to sqlite3_exec()
378
378
  ** is a valid and open [database connection].
379
379
  ** <li> The application must not close the [database connection] specified by
380
380
  ** the 1st parameter to sqlite3_exec() while sqlite3_exec() is running.
@@ -477,6 +477,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_exec(
477
477
  #define SQLITE_IOERR_MMAP (SQLITE_IOERR | (24<<8))
478
478
  #define SQLITE_IOERR_GETTEMPPATH (SQLITE_IOERR | (25<<8))
479
479
  #define SQLITE_IOERR_CONVPATH (SQLITE_IOERR | (26<<8))
480
+ #define SQLITE_IOERR_VNODE (SQLITE_IOERR | (27<<8))
480
481
  #define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8))
481
482
  #define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8))
482
483
  #define SQLITE_BUSY_SNAPSHOT (SQLITE_BUSY | (2<<8))
@@ -1366,9 +1367,11 @@ SQLITE_API int SQLITE_STDCALL sqlite3_os_end(void);
1366
1367
  ** applications and so this routine is usually not necessary. It is
1367
1368
  ** provided to support rare applications with unusual needs.
1368
1369
  **
1369
- ** The sqlite3_config() interface is not threadsafe. The application
1370
- ** must insure that no other SQLite interfaces are invoked by other
1371
- ** threads while sqlite3_config() is running. Furthermore, sqlite3_config()
1370
+ ** <b>The sqlite3_config() interface is not threadsafe. The application
1371
+ ** must ensure that no other SQLite interfaces are invoked by other
1372
+ ** threads while sqlite3_config() is running.</b>
1373
+ **
1374
+ ** The sqlite3_config() interface
1372
1375
  ** may only be invoked prior to library initialization using
1373
1376
  ** [sqlite3_initialize()] or after shutdown by [sqlite3_shutdown()].
1374
1377
  ** ^If sqlite3_config() is called after [sqlite3_initialize()] and before
@@ -3373,7 +3376,8 @@ SQLITE_API int SQLITE_STDCALL sqlite3_stmt_readonly(sqlite3_stmt *pStmt);
3373
3376
  **
3374
3377
  ** ^The sqlite3_stmt_busy(S) interface returns true (non-zero) if the
3375
3378
  ** [prepared statement] S has been stepped at least once using
3376
- ** [sqlite3_step(S)] but has not run to completion and/or has not
3379
+ ** [sqlite3_step(S)] but has neither run to completion (returned
3380
+ ** [SQLITE_DONE] from [sqlite3_step(S)]) nor
3377
3381
  ** been reset using [sqlite3_reset(S)]. ^The sqlite3_stmt_busy(S)
3378
3382
  ** interface returns false if S is a NULL pointer. If S is not a
3379
3383
  ** NULL pointer and is not a pointer to a valid [prepared statement]
@@ -3626,7 +3630,7 @@ SQLITE_API const char *SQLITE_STDCALL sqlite3_bind_parameter_name(sqlite3_stmt*,
3626
3630
  **
3627
3631
  ** See also: [sqlite3_bind_blob|sqlite3_bind()],
3628
3632
  ** [sqlite3_bind_parameter_count()], and
3629
- ** [sqlite3_bind_parameter_index()].
3633
+ ** [sqlite3_bind_parameter_name()].
3630
3634
  */
3631
3635
  SQLITE_API int SQLITE_STDCALL sqlite3_bind_parameter_index(sqlite3_stmt*, const char *zName);
3632
3636
 
@@ -4355,6 +4359,22 @@ SQLITE_API const void *SQLITE_STDCALL sqlite3_value_text16be(sqlite3_value*);
4355
4359
  SQLITE_API int SQLITE_STDCALL sqlite3_value_type(sqlite3_value*);
4356
4360
  SQLITE_API int SQLITE_STDCALL sqlite3_value_numeric_type(sqlite3_value*);
4357
4361
 
4362
+ /*
4363
+ ** CAPI3REF: Finding The Subtype Of SQL Values
4364
+ ** METHOD: sqlite3_value
4365
+ **
4366
+ ** The sqlite3_value_subtype(V) function returns the subtype for
4367
+ ** an [application-defined SQL function] argument V. The subtype
4368
+ ** information can be used to pass a limited amount of context from
4369
+ ** one SQL function to another. Use the [sqlite3_result_subtype()]
4370
+ ** routine to set the subtype for the return value of an SQL function.
4371
+ **
4372
+ ** SQLite makes no use of subtype itself. It merely passes the subtype
4373
+ ** from the result of one [application-defined SQL function] into the
4374
+ ** input of another.
4375
+ */
4376
+ SQLITE_API unsigned int SQLITE_STDCALL sqlite3_value_subtype(sqlite3_value*);
4377
+
4358
4378
  /*
4359
4379
  ** CAPI3REF: Copy And Free SQL Values
4360
4380
  ** METHOD: sqlite3_value
@@ -4654,6 +4674,21 @@ SQLITE_API void SQLITE_STDCALL sqlite3_result_value(sqlite3_context*, sqlite3_va
4654
4674
  SQLITE_API void SQLITE_STDCALL sqlite3_result_zeroblob(sqlite3_context*, int n);
4655
4675
  SQLITE_API int SQLITE_STDCALL sqlite3_result_zeroblob64(sqlite3_context*, sqlite3_uint64 n);
4656
4676
 
4677
+
4678
+ /*
4679
+ ** CAPI3REF: Setting The Subtype Of An SQL Function
4680
+ ** METHOD: sqlite3_context
4681
+ **
4682
+ ** The sqlite3_result_subtype(C,T) function causes the subtype of
4683
+ ** the result from the [application-defined SQL function] with
4684
+ ** [sqlite3_context] C to be the value T. Only the lower 8 bits
4685
+ ** of the subtype T are preserved in current versions of SQLite;
4686
+ ** higher order bits are discarded.
4687
+ ** The number of subtype bytes preserved by SQLite might increase
4688
+ ** in future releases of SQLite.
4689
+ */
4690
+ SQLITE_API void SQLITE_STDCALL sqlite3_result_subtype(sqlite3_context*,unsigned int);
4691
+
4657
4692
  /*
4658
4693
  ** CAPI3REF: Define New Collating Sequences
4659
4694
  ** METHOD: sqlite3
@@ -5599,13 +5634,31 @@ struct sqlite3_module {
5599
5634
  ** ^The estimatedRows value is an estimate of the number of rows that
5600
5635
  ** will be returned by the strategy.
5601
5636
  **
5637
+ ** The xBestIndex method may optionally populate the idxFlags field with a
5638
+ ** mask of SQLITE_INDEX_SCAN_* flags. Currently there is only one such flag -
5639
+ ** SQLITE_INDEX_SCAN_UNIQUE. If the xBestIndex method sets this flag, SQLite
5640
+ ** assumes that the strategy may visit at most one row.
5641
+ **
5642
+ ** Additionally, if xBestIndex sets the SQLITE_INDEX_SCAN_UNIQUE flag, then
5643
+ ** SQLite also assumes that if a call to the xUpdate() method is made as
5644
+ ** part of the same statement to delete or update a virtual table row and the
5645
+ ** implementation returns SQLITE_CONSTRAINT, then there is no need to rollback
5646
+ ** any database changes. In other words, if the xUpdate() returns
5647
+ ** SQLITE_CONSTRAINT, the database contents must be exactly as they were
5648
+ ** before xUpdate was called. By contrast, if SQLITE_INDEX_SCAN_UNIQUE is not
5649
+ ** set and xUpdate returns SQLITE_CONSTRAINT, any database changes made by
5650
+ ** the xUpdate method are automatically rolled back by SQLite.
5651
+ **
5602
5652
  ** IMPORTANT: The estimatedRows field was added to the sqlite3_index_info
5603
5653
  ** structure for SQLite version 3.8.2. If a virtual table extension is
5604
5654
  ** used with an SQLite version earlier than 3.8.2, the results of attempting
5605
5655
  ** to read or write the estimatedRows field are undefined (but are likely
5606
5656
  ** to included crashing the application). The estimatedRows field should
5607
5657
  ** therefore only be used if [sqlite3_libversion_number()] returns a
5608
- ** value greater than or equal to 3008002.
5658
+ ** value greater than or equal to 3008002. Similarly, the idxFlags field
5659
+ ** was added for version 3.9.0. It may therefore only be used if
5660
+ ** sqlite3_libversion_number() returns a value greater than or equal to
5661
+ ** 3009000.
5609
5662
  */
5610
5663
  struct sqlite3_index_info {
5611
5664
  /* Inputs */
@@ -5633,8 +5686,15 @@ struct sqlite3_index_info {
5633
5686
  double estimatedCost; /* Estimated cost of using this index */
5634
5687
  /* Fields below are only available in SQLite 3.8.2 and later */
5635
5688
  sqlite3_int64 estimatedRows; /* Estimated number of rows returned */
5689
+ /* Fields below are only available in SQLite 3.9.0 and later */
5690
+ int idxFlags; /* Mask of SQLITE_INDEX_SCAN_* flags */
5636
5691
  };
5637
5692
 
5693
+ /*
5694
+ ** CAPI3REF: Virtual Table Scan Flags
5695
+ */
5696
+ #define SQLITE_INDEX_SCAN_UNIQUE 1 /* Scan visits at most 1 row */
5697
+
5638
5698
  /*
5639
5699
  ** CAPI3REF: Virtual Table Constraint Operator Codes
5640
5700
  **
@@ -6092,6 +6152,9 @@ SQLITE_API int SQLITE_STDCALL sqlite3_vfs_unregister(sqlite3_vfs*);
6092
6152
  ** <li> SQLITE_MUTEX_STATIC_APP1
6093
6153
  ** <li> SQLITE_MUTEX_STATIC_APP2
6094
6154
  ** <li> SQLITE_MUTEX_STATIC_APP3
6155
+ ** <li> SQLITE_MUTEX_STATIC_VFS1
6156
+ ** <li> SQLITE_MUTEX_STATIC_VFS2
6157
+ ** <li> SQLITE_MUTEX_STATIC_VFS3
6095
6158
  ** </ul>
6096
6159
  **
6097
6160
  ** ^The first two constants (SQLITE_MUTEX_FAST and SQLITE_MUTEX_RECURSIVE)
@@ -7858,3 +7921,523 @@ struct sqlite3_rtree_query_info {
7858
7921
 
7859
7922
  #endif /* ifndef _SQLITE3RTREE_H_ */
7860
7923
 
7924
+ /*
7925
+ ** 2014 May 31
7926
+ **
7927
+ ** The author disclaims copyright to this source code. In place of
7928
+ ** a legal notice, here is a blessing:
7929
+ **
7930
+ ** May you do good and not evil.
7931
+ ** May you find forgiveness for yourself and forgive others.
7932
+ ** May you share freely, never taking more than you give.
7933
+ **
7934
+ ******************************************************************************
7935
+ **
7936
+ ** Interfaces to extend FTS5. Using the interfaces defined in this file,
7937
+ ** FTS5 may be extended with:
7938
+ **
7939
+ ** * custom tokenizers, and
7940
+ ** * custom auxiliary functions.
7941
+ */
7942
+
7943
+
7944
+ #ifndef _FTS5_H
7945
+ #define _FTS5_H
7946
+
7947
+
7948
+ #ifdef __cplusplus
7949
+ extern "C" {
7950
+ #endif
7951
+
7952
+ /*************************************************************************
7953
+ ** CUSTOM AUXILIARY FUNCTIONS
7954
+ **
7955
+ ** Virtual table implementations may overload SQL functions by implementing
7956
+ ** the sqlite3_module.xFindFunction() method.
7957
+ */
7958
+
7959
+ typedef struct Fts5ExtensionApi Fts5ExtensionApi;
7960
+ typedef struct Fts5Context Fts5Context;
7961
+ typedef struct Fts5PhraseIter Fts5PhraseIter;
7962
+
7963
+ typedef void (*fts5_extension_function)(
7964
+ const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
7965
+ Fts5Context *pFts, /* First arg to pass to pApi functions */
7966
+ sqlite3_context *pCtx, /* Context for returning result/error */
7967
+ int nVal, /* Number of values in apVal[] array */
7968
+ sqlite3_value **apVal /* Array of trailing arguments */
7969
+ );
7970
+
7971
+ struct Fts5PhraseIter {
7972
+ const unsigned char *a;
7973
+ const unsigned char *b;
7974
+ };
7975
+
7976
+ /*
7977
+ ** EXTENSION API FUNCTIONS
7978
+ **
7979
+ ** xUserData(pFts):
7980
+ ** Return a copy of the context pointer the extension function was
7981
+ ** registered with.
7982
+ **
7983
+ ** xColumnTotalSize(pFts, iCol, pnToken):
7984
+ ** If parameter iCol is less than zero, set output variable *pnToken
7985
+ ** to the total number of tokens in the FTS5 table. Or, if iCol is
7986
+ ** non-negative but less than the number of columns in the table, return
7987
+ ** the total number of tokens in column iCol, considering all rows in
7988
+ ** the FTS5 table.
7989
+ **
7990
+ ** If parameter iCol is greater than or equal to the number of columns
7991
+ ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
7992
+ ** an OOM condition or IO error), an appropriate SQLite error code is
7993
+ ** returned.
7994
+ **
7995
+ ** xColumnCount(pFts):
7996
+ ** Return the number of columns in the table.
7997
+ **
7998
+ ** xColumnSize(pFts, iCol, pnToken):
7999
+ ** If parameter iCol is less than zero, set output variable *pnToken
8000
+ ** to the total number of tokens in the current row. Or, if iCol is
8001
+ ** non-negative but less than the number of columns in the table, set
8002
+ ** *pnToken to the number of tokens in column iCol of the current row.
8003
+ **
8004
+ ** If parameter iCol is greater than or equal to the number of columns
8005
+ ** in the table, SQLITE_RANGE is returned. Or, if an error occurs (e.g.
8006
+ ** an OOM condition or IO error), an appropriate SQLite error code is
8007
+ ** returned.
8008
+ **
8009
+ ** xColumnText:
8010
+ ** This function attempts to retrieve the text of column iCol of the
8011
+ ** current document. If successful, (*pz) is set to point to a buffer
8012
+ ** containing the text in utf-8 encoding, (*pn) is set to the size in bytes
8013
+ ** (not characters) of the buffer and SQLITE_OK is returned. Otherwise,
8014
+ ** if an error occurs, an SQLite error code is returned and the final values
8015
+ ** of (*pz) and (*pn) are undefined.
8016
+ **
8017
+ ** xPhraseCount:
8018
+ ** Returns the number of phrases in the current query expression.
8019
+ **
8020
+ ** xPhraseSize:
8021
+ ** Returns the number of tokens in phrase iPhrase of the query. Phrases
8022
+ ** are numbered starting from zero.
8023
+ **
8024
+ ** xInstCount:
8025
+ ** Set *pnInst to the total number of occurrences of all phrases within
8026
+ ** the query within the current row. Return SQLITE_OK if successful, or
8027
+ ** an error code (i.e. SQLITE_NOMEM) if an error occurs.
8028
+ **
8029
+ ** xInst:
8030
+ ** Query for the details of phrase match iIdx within the current row.
8031
+ ** Phrase matches are numbered starting from zero, so the iIdx argument
8032
+ ** should be greater than or equal to zero and smaller than the value
8033
+ ** output by xInstCount().
8034
+ **
8035
+ ** Returns SQLITE_OK if successful, or an error code (i.e. SQLITE_NOMEM)
8036
+ ** if an error occurs.
8037
+ **
8038
+ ** xRowid:
8039
+ ** Returns the rowid of the current row.
8040
+ **
8041
+ ** xTokenize:
8042
+ ** Tokenize text using the tokenizer belonging to the FTS5 table.
8043
+ **
8044
+ ** xQueryPhrase(pFts5, iPhrase, pUserData, xCallback):
8045
+ ** This API function is used to query the FTS table for phrase iPhrase
8046
+ ** of the current query. Specifically, a query equivalent to:
8047
+ **
8048
+ ** ... FROM ftstable WHERE ftstable MATCH $p ORDER BY rowid
8049
+ **
8050
+ ** with $p set to a phrase equivalent to the phrase iPhrase of the
8051
+ ** current query is executed. For each row visited, the callback function
8052
+ ** passed as the fourth argument is invoked. The context and API objects
8053
+ ** passed to the callback function may be used to access the properties of
8054
+ ** each matched row. Invoking Api.xUserData() returns a copy of the pointer
8055
+ ** passed as the third argument to pUserData.
8056
+ **
8057
+ ** If the callback function returns any value other than SQLITE_OK, the
8058
+ ** query is abandoned and the xQueryPhrase function returns immediately.
8059
+ ** If the returned value is SQLITE_DONE, xQueryPhrase returns SQLITE_OK.
8060
+ ** Otherwise, the error code is propagated upwards.
8061
+ **
8062
+ ** If the query runs to completion without incident, SQLITE_OK is returned.
8063
+ ** Or, if some error occurs before the query completes or is aborted by
8064
+ ** the callback, an SQLite error code is returned.
8065
+ **
8066
+ **
8067
+ ** xSetAuxdata(pFts5, pAux, xDelete)
8068
+ **
8069
+ ** Save the pointer passed as the second argument as the extension functions
8070
+ ** "auxiliary data". The pointer may then be retrieved by the current or any
8071
+ ** future invocation of the same fts5 extension function made as part of
8072
+ ** of the same MATCH query using the xGetAuxdata() API.
8073
+ **
8074
+ ** Each extension function is allocated a single auxiliary data slot for
8075
+ ** each FTS query (MATCH expression). If the extension function is invoked
8076
+ ** more than once for a single FTS query, then all invocations share a
8077
+ ** single auxiliary data context.
8078
+ **
8079
+ ** If there is already an auxiliary data pointer when this function is
8080
+ ** invoked, then it is replaced by the new pointer. If an xDelete callback
8081
+ ** was specified along with the original pointer, it is invoked at this
8082
+ ** point.
8083
+ **
8084
+ ** The xDelete callback, if one is specified, is also invoked on the
8085
+ ** auxiliary data pointer after the FTS5 query has finished.
8086
+ **
8087
+ ** If an error (e.g. an OOM condition) occurs within this function, an
8088
+ ** the auxiliary data is set to NULL and an error code returned. If the
8089
+ ** xDelete parameter was not NULL, it is invoked on the auxiliary data
8090
+ ** pointer before returning.
8091
+ **
8092
+ **
8093
+ ** xGetAuxdata(pFts5, bClear)
8094
+ **
8095
+ ** Returns the current auxiliary data pointer for the fts5 extension
8096
+ ** function. See the xSetAuxdata() method for details.
8097
+ **
8098
+ ** If the bClear argument is non-zero, then the auxiliary data is cleared
8099
+ ** (set to NULL) before this function returns. In this case the xDelete,
8100
+ ** if any, is not invoked.
8101
+ **
8102
+ **
8103
+ ** xRowCount(pFts5, pnRow)
8104
+ **
8105
+ ** This function is used to retrieve the total number of rows in the table.
8106
+ ** In other words, the same value that would be returned by:
8107
+ **
8108
+ ** SELECT count(*) FROM ftstable;
8109
+ **
8110
+ ** xPhraseFirst()
8111
+ ** This function is used, along with type Fts5PhraseIter and the xPhraseNext
8112
+ ** method, to iterate through all instances of a single query phrase within
8113
+ ** the current row. This is the same information as is accessible via the
8114
+ ** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
8115
+ ** to use, this API may be faster under some circumstances. To iterate
8116
+ ** through instances of phrase iPhrase, use the following code:
8117
+ **
8118
+ ** Fts5PhraseIter iter;
8119
+ ** int iCol, iOff;
8120
+ ** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
8121
+ ** iOff>=0;
8122
+ ** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
8123
+ ** ){
8124
+ ** // An instance of phrase iPhrase at offset iOff of column iCol
8125
+ ** }
8126
+ **
8127
+ ** The Fts5PhraseIter structure is defined above. Applications should not
8128
+ ** modify this structure directly - it should only be used as shown above
8129
+ ** with the xPhraseFirst() and xPhraseNext() API methods.
8130
+ **
8131
+ ** xPhraseNext()
8132
+ ** See xPhraseFirst above.
8133
+ */
8134
+ struct Fts5ExtensionApi {
8135
+ int iVersion; /* Currently always set to 1 */
8136
+
8137
+ void *(*xUserData)(Fts5Context*);
8138
+
8139
+ int (*xColumnCount)(Fts5Context*);
8140
+ int (*xRowCount)(Fts5Context*, sqlite3_int64 *pnRow);
8141
+ int (*xColumnTotalSize)(Fts5Context*, int iCol, sqlite3_int64 *pnToken);
8142
+
8143
+ int (*xTokenize)(Fts5Context*,
8144
+ const char *pText, int nText, /* Text to tokenize */
8145
+ void *pCtx, /* Context passed to xToken() */
8146
+ int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
8147
+ );
8148
+
8149
+ int (*xPhraseCount)(Fts5Context*);
8150
+ int (*xPhraseSize)(Fts5Context*, int iPhrase);
8151
+
8152
+ int (*xInstCount)(Fts5Context*, int *pnInst);
8153
+ int (*xInst)(Fts5Context*, int iIdx, int *piPhrase, int *piCol, int *piOff);
8154
+
8155
+ sqlite3_int64 (*xRowid)(Fts5Context*);
8156
+ int (*xColumnText)(Fts5Context*, int iCol, const char **pz, int *pn);
8157
+ int (*xColumnSize)(Fts5Context*, int iCol, int *pnToken);
8158
+
8159
+ int (*xQueryPhrase)(Fts5Context*, int iPhrase, void *pUserData,
8160
+ int(*)(const Fts5ExtensionApi*,Fts5Context*,void*)
8161
+ );
8162
+ int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
8163
+ void *(*xGetAuxdata)(Fts5Context*, int bClear);
8164
+
8165
+ void (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
8166
+ void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
8167
+ };
8168
+
8169
+ /*
8170
+ ** CUSTOM AUXILIARY FUNCTIONS
8171
+ *************************************************************************/
8172
+
8173
+ /*************************************************************************
8174
+ ** CUSTOM TOKENIZERS
8175
+ **
8176
+ ** Applications may also register custom tokenizer types. A tokenizer
8177
+ ** is registered by providing fts5 with a populated instance of the
8178
+ ** following structure. All structure methods must be defined, setting
8179
+ ** any member of the fts5_tokenizer struct to NULL leads to undefined
8180
+ ** behaviour. The structure methods are expected to function as follows:
8181
+ **
8182
+ ** xCreate:
8183
+ ** This function is used to allocate and inititalize a tokenizer instance.
8184
+ ** A tokenizer instance is required to actually tokenize text.
8185
+ **
8186
+ ** The first argument passed to this function is a copy of the (void*)
8187
+ ** pointer provided by the application when the fts5_tokenizer object
8188
+ ** was registered with FTS5 (the third argument to xCreateTokenizer()).
8189
+ ** The second and third arguments are an array of nul-terminated strings
8190
+ ** containing the tokenizer arguments, if any, specified following the
8191
+ ** tokenizer name as part of the CREATE VIRTUAL TABLE statement used
8192
+ ** to create the FTS5 table.
8193
+ **
8194
+ ** The final argument is an output variable. If successful, (*ppOut)
8195
+ ** should be set to point to the new tokenizer handle and SQLITE_OK
8196
+ ** returned. If an error occurs, some value other than SQLITE_OK should
8197
+ ** be returned. In this case, fts5 assumes that the final value of *ppOut
8198
+ ** is undefined.
8199
+ **
8200
+ ** xDelete:
8201
+ ** This function is invoked to delete a tokenizer handle previously
8202
+ ** allocated using xCreate(). Fts5 guarantees that this function will
8203
+ ** be invoked exactly once for each successful call to xCreate().
8204
+ **
8205
+ ** xTokenize:
8206
+ ** This function is expected to tokenize the nText byte string indicated
8207
+ ** by argument pText. pText may or may not be nul-terminated. The first
8208
+ ** argument passed to this function is a pointer to an Fts5Tokenizer object
8209
+ ** returned by an earlier call to xCreate().
8210
+ **
8211
+ ** The second argument indicates the reason that FTS5 is requesting
8212
+ ** tokenization of the supplied text. This is always one of the following
8213
+ ** four values:
8214
+ **
8215
+ ** <ul><li> <b>FTS5_TOKENIZE_DOCUMENT</b> - A document is being inserted into
8216
+ ** or removed from the FTS table. The tokenizer is being invoked to
8217
+ ** determine the set of tokens to add to (or delete from) the
8218
+ ** FTS index.
8219
+ **
8220
+ ** <li> <b>FTS5_TOKENIZE_QUERY</b> - A MATCH query is being executed
8221
+ ** against the FTS index. The tokenizer is being called to tokenize
8222
+ ** a bareword or quoted string specified as part of the query.
8223
+ **
8224
+ ** <li> <b>(FTS5_TOKENIZE_QUERY | FTS5_TOKENIZE_PREFIX)</b> - Same as
8225
+ ** FTS5_TOKENIZE_QUERY, except that the bareword or quoted string is
8226
+ ** followed by a "*" character, indicating that the last token
8227
+ ** returned by the tokenizer will be treated as a token prefix.
8228
+ **
8229
+ ** <li> <b>FTS5_TOKENIZE_AUX</b> - The tokenizer is being invoked to
8230
+ ** satisfy an fts5_api.xTokenize() request made by an auxiliary
8231
+ ** function. Or an fts5_api.xColumnSize() request made by the same
8232
+ ** on a columnsize=0 database.
8233
+ ** </ul>
8234
+ **
8235
+ ** For each token in the input string, the supplied callback xToken() must
8236
+ ** be invoked. The first argument to it should be a copy of the pointer
8237
+ ** passed as the second argument to xTokenize(). The third and fourth
8238
+ ** arguments are a pointer to a buffer containing the token text, and the
8239
+ ** size of the token in bytes. The 4th and 5th arguments are the byte offsets
8240
+ ** of the first byte of and first byte immediately following the text from
8241
+ ** which the token is derived within the input.
8242
+ **
8243
+ ** The second argument passed to the xToken() callback ("tflags") should
8244
+ ** normally be set to 0. The exception is if the tokenizer supports
8245
+ ** synonyms. In this case see the discussion below for details.
8246
+ **
8247
+ ** FTS5 assumes the xToken() callback is invoked for each token in the
8248
+ ** order that they occur within the input text.
8249
+ **
8250
+ ** If an xToken() callback returns any value other than SQLITE_OK, then
8251
+ ** the tokenization should be abandoned and the xTokenize() method should
8252
+ ** immediately return a copy of the xToken() return value. Or, if the
8253
+ ** input buffer is exhausted, xTokenize() should return SQLITE_OK. Finally,
8254
+ ** if an error occurs with the xTokenize() implementation itself, it
8255
+ ** may abandon the tokenization and return any error code other than
8256
+ ** SQLITE_OK or SQLITE_DONE.
8257
+ **
8258
+ ** SYNONYM SUPPORT
8259
+ **
8260
+ ** Custom tokenizers may also support synonyms. Consider a case in which a
8261
+ ** user wishes to query for a phrase such as "first place". Using the
8262
+ ** built-in tokenizers, the FTS5 query 'first + place' will match instances
8263
+ ** of "first place" within the document set, but not alternative forms
8264
+ ** such as "1st place". In some applications, it would be better to match
8265
+ ** all instances of "first place" or "1st place" regardless of which form
8266
+ ** the user specified in the MATCH query text.
8267
+ **
8268
+ ** There are several ways to approach this in FTS5:
8269
+ **
8270
+ ** <ol><li> By mapping all synonyms to a single token. In this case, the
8271
+ ** In the above example, this means that the tokenizer returns the
8272
+ ** same token for inputs "first" and "1st". Say that token is in
8273
+ ** fact "first", so that when the user inserts the document "I won
8274
+ ** 1st place" entries are added to the index for tokens "i", "won",
8275
+ ** "first" and "place". If the user then queries for '1st + place',
8276
+ ** the tokenizer substitutes "first" for "1st" and the query works
8277
+ ** as expected.
8278
+ **
8279
+ ** <li> By adding multiple synonyms for a single term to the FTS index.
8280
+ ** In this case, when tokenizing query text, the tokenizer may
8281
+ ** provide multiple synonyms for a single term within the document.
8282
+ ** FTS5 then queries the index for each synonym individually. For
8283
+ ** example, faced with the query:
8284
+ **
8285
+ ** <codeblock>
8286
+ ** ... MATCH 'first place'</codeblock>
8287
+ **
8288
+ ** the tokenizer offers both "1st" and "first" as synonyms for the
8289
+ ** first token in the MATCH query and FTS5 effectively runs a query
8290
+ ** similar to:
8291
+ **
8292
+ ** <codeblock>
8293
+ ** ... MATCH '(first OR 1st) place'</codeblock>
8294
+ **
8295
+ ** except that, for the purposes of auxiliary functions, the query
8296
+ ** still appears to contain just two phrases - "(first OR 1st)"
8297
+ ** being treated as a single phrase.
8298
+ **
8299
+ ** <li> By adding multiple synonyms for a single term to the FTS index.
8300
+ ** Using this method, when tokenizing document text, the tokenizer
8301
+ ** provides multiple synonyms for each token. So that when a
8302
+ ** document such as "I won first place" is tokenized, entries are
8303
+ ** added to the FTS index for "i", "won", "first", "1st" and
8304
+ ** "place".
8305
+ **
8306
+ ** This way, even if the tokenizer does not provide synonyms
8307
+ ** when tokenizing query text (it should not - to do would be
8308
+ ** inefficient), it doesn't matter if the user queries for
8309
+ ** 'first + place' or '1st + place', as there are entires in the
8310
+ ** FTS index corresponding to both forms of the first token.
8311
+ ** </ol>
8312
+ **
8313
+ ** Whether it is parsing document or query text, any call to xToken that
8314
+ ** specifies a <i>tflags</i> argument with the FTS5_TOKEN_COLOCATED bit
8315
+ ** is considered to supply a synonym for the previous token. For example,
8316
+ ** when parsing the document "I won first place", a tokenizer that supports
8317
+ ** synonyms would call xToken() 5 times, as follows:
8318
+ **
8319
+ ** <codeblock>
8320
+ ** xToken(pCtx, 0, "i", 1, 0, 1);
8321
+ ** xToken(pCtx, 0, "won", 3, 2, 5);
8322
+ ** xToken(pCtx, 0, "first", 5, 6, 11);
8323
+ ** xToken(pCtx, FTS5_TOKEN_COLOCATED, "1st", 3, 6, 11);
8324
+ ** xToken(pCtx, 0, "place", 5, 12, 17);
8325
+ **</codeblock>
8326
+ **
8327
+ ** It is an error to specify the FTS5_TOKEN_COLOCATED flag the first time
8328
+ ** xToken() is called. Multiple synonyms may be specified for a single token
8329
+ ** by making multiple calls to xToken(FTS5_TOKEN_COLOCATED) in sequence.
8330
+ ** There is no limit to the number of synonyms that may be provided for a
8331
+ ** single token.
8332
+ **
8333
+ ** In many cases, method (1) above is the best approach. It does not add
8334
+ ** extra data to the FTS index or require FTS5 to query for multiple terms,
8335
+ ** so it is efficient in terms of disk space and query speed. However, it
8336
+ ** does not support prefix queries very well. If, as suggested above, the
8337
+ ** token "first" is subsituted for "1st" by the tokenizer, then the query:
8338
+ **
8339
+ ** <codeblock>
8340
+ ** ... MATCH '1s*'</codeblock>
8341
+ **
8342
+ ** will not match documents that contain the token "1st" (as the tokenizer
8343
+ ** will probably not map "1s" to any prefix of "first").
8344
+ **
8345
+ ** For full prefix support, method (3) may be preferred. In this case,
8346
+ ** because the index contains entries for both "first" and "1st", prefix
8347
+ ** queries such as 'fi*' or '1s*' will match correctly. However, because
8348
+ ** extra entries are added to the FTS index, this method uses more space
8349
+ ** within the database.
8350
+ **
8351
+ ** Method (2) offers a midpoint between (1) and (3). Using this method,
8352
+ ** a query such as '1s*' will match documents that contain the literal
8353
+ ** token "1st", but not "first" (assuming the tokenizer is not able to
8354
+ ** provide synonyms for prefixes). However, a non-prefix query like '1st'
8355
+ ** will match against "1st" and "first". This method does not require
8356
+ ** extra disk space, as no extra entries are added to the FTS index.
8357
+ ** On the other hand, it may require more CPU cycles to run MATCH queries,
8358
+ ** as separate queries of the FTS index are required for each synonym.
8359
+ **
8360
+ ** When using methods (2) or (3), it is important that the tokenizer only
8361
+ ** provide synonyms when tokenizing document text (method (2)) or query
8362
+ ** text (method (3)), not both. Doing so will not cause any errors, but is
8363
+ ** inefficient.
8364
+ */
8365
+ typedef struct Fts5Tokenizer Fts5Tokenizer;
8366
+ typedef struct fts5_tokenizer fts5_tokenizer;
8367
+ struct fts5_tokenizer {
8368
+ int (*xCreate)(void*, const char **azArg, int nArg, Fts5Tokenizer **ppOut);
8369
+ void (*xDelete)(Fts5Tokenizer*);
8370
+ int (*xTokenize)(Fts5Tokenizer*,
8371
+ void *pCtx,
8372
+ int flags, /* Mask of FTS5_TOKENIZE_* flags */
8373
+ const char *pText, int nText,
8374
+ int (*xToken)(
8375
+ void *pCtx, /* Copy of 2nd argument to xTokenize() */
8376
+ int tflags, /* Mask of FTS5_TOKEN_* flags */
8377
+ const char *pToken, /* Pointer to buffer containing token */
8378
+ int nToken, /* Size of token in bytes */
8379
+ int iStart, /* Byte offset of token within input text */
8380
+ int iEnd /* Byte offset of end of token within input text */
8381
+ )
8382
+ );
8383
+ };
8384
+
8385
+ /* Flags that may be passed as the third argument to xTokenize() */
8386
+ #define FTS5_TOKENIZE_QUERY 0x0001
8387
+ #define FTS5_TOKENIZE_PREFIX 0x0002
8388
+ #define FTS5_TOKENIZE_DOCUMENT 0x0004
8389
+ #define FTS5_TOKENIZE_AUX 0x0008
8390
+
8391
+ /* Flags that may be passed by the tokenizer implementation back to FTS5
8392
+ ** as the third argument to the supplied xToken callback. */
8393
+ #define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
8394
+
8395
+ /*
8396
+ ** END OF CUSTOM TOKENIZERS
8397
+ *************************************************************************/
8398
+
8399
+ /*************************************************************************
8400
+ ** FTS5 EXTENSION REGISTRATION API
8401
+ */
8402
+ typedef struct fts5_api fts5_api;
8403
+ struct fts5_api {
8404
+ int iVersion; /* Currently always set to 2 */
8405
+
8406
+ /* Create a new tokenizer */
8407
+ int (*xCreateTokenizer)(
8408
+ fts5_api *pApi,
8409
+ const char *zName,
8410
+ void *pContext,
8411
+ fts5_tokenizer *pTokenizer,
8412
+ void (*xDestroy)(void*)
8413
+ );
8414
+
8415
+ /* Find an existing tokenizer */
8416
+ int (*xFindTokenizer)(
8417
+ fts5_api *pApi,
8418
+ const char *zName,
8419
+ void **ppContext,
8420
+ fts5_tokenizer *pTokenizer
8421
+ );
8422
+
8423
+ /* Create a new auxiliary function */
8424
+ int (*xCreateFunction)(
8425
+ fts5_api *pApi,
8426
+ const char *zName,
8427
+ void *pContext,
8428
+ fts5_extension_function xFunction,
8429
+ void (*xDestroy)(void*)
8430
+ );
8431
+ };
8432
+
8433
+ /*
8434
+ ** END OF REGISTRATION API
8435
+ *************************************************************************/
8436
+
8437
+ #ifdef __cplusplus
8438
+ } /* end of the 'extern "C"' block */
8439
+ #endif
8440
+
8441
+ #endif /* _FTS5_H */
8442
+
8443
+