opener-opinion-detector-basic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +30 -0
  3. data/bin/opinion-detector-basic +19 -0
  4. data/bin/opinion-detector-basic-server +10 -0
  5. data/config.ru +4 -0
  6. data/core/opinion_detector_basic_multi.py +499 -0
  7. data/core/packages/KafNafParser-1.3.tar.gz +0 -0
  8. data/core/packages/VUA_pylib-1.4.tar.gz +0 -0
  9. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  10. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  11. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  12. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  13. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  14. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  15. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  16. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  17. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  18. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  19. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  20. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  21. data/core/vendor/src/crfsuite/COPYING +27 -0
  22. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  23. data/core/vendor/src/crfsuite/INSTALL +236 -0
  24. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  25. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  26. data/core/vendor/src/crfsuite/README +183 -0
  27. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  28. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  29. data/core/vendor/src/crfsuite/compile +143 -0
  30. data/core/vendor/src/crfsuite/config.guess +1502 -0
  31. data/core/vendor/src/crfsuite/config.h.in +198 -0
  32. data/core/vendor/src/crfsuite/config.sub +1714 -0
  33. data/core/vendor/src/crfsuite/configure +14273 -0
  34. data/core/vendor/src/crfsuite/configure.in +149 -0
  35. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  36. data/core/vendor/src/crfsuite/depcomp +630 -0
  37. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  38. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  39. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  40. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  41. data/core/vendor/src/crfsuite/example/template.py +88 -0
  42. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  43. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  44. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  45. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  46. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  47. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  48. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  49. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  50. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  51. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  52. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  53. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  54. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  55. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  56. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  57. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  58. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  59. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  60. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  61. data/core/vendor/src/crfsuite/include/os.h +61 -0
  62. data/core/vendor/src/crfsuite/install-sh +520 -0
  63. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  64. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  65. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  66. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  67. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  68. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  69. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  70. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  71. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  72. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  73. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  74. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  75. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  76. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  77. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  78. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  79. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  80. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  81. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  82. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  83. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  84. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  85. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  86. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  87. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  88. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  89. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  90. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  91. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  92. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  93. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  94. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  95. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  96. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  97. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  98. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  99. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  100. data/core/vendor/src/crfsuite/missing +376 -0
  101. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  102. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  103. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  104. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  105. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  106. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  107. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  108. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  109. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  110. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  111. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  112. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  113. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  114. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  115. data/core/vendor/src/liblbfgs/COPYING +22 -0
  116. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  117. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  118. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  119. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  120. data/core/vendor/src/liblbfgs/NEWS +0 -0
  121. data/core/vendor/src/liblbfgs/README +71 -0
  122. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  123. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  124. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  125. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  126. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  127. data/core/vendor/src/liblbfgs/configure +21146 -0
  128. data/core/vendor/src/liblbfgs/configure.in +107 -0
  129. data/core/vendor/src/liblbfgs/depcomp +522 -0
  130. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  131. data/core/vendor/src/liblbfgs/install-sh +322 -0
  132. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  133. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  134. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  135. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  136. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  137. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  138. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  139. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  140. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  141. data/core/vendor/src/liblbfgs/missing +353 -0
  142. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  143. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  144. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  145. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  146. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  147. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  148. data/core/vendor/src/svm_light/Makefile +105 -0
  149. data/core/vendor/src/svm_light/kernel.h +40 -0
  150. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  151. data/core/vendor/src/svm_light/svm_common.c +985 -0
  152. data/core/vendor/src/svm_light/svm_common.h +301 -0
  153. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  154. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  155. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  156. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  157. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  158. data/ext/hack/Rakefile +17 -0
  159. data/ext/hack/support.rb +88 -0
  160. data/lib/opener/opinion_detector_basic.rb +91 -0
  161. data/lib/opener/opinion_detector_basic/public/markdown.css +284 -0
  162. data/lib/opener/opinion_detector_basic/server.rb +16 -0
  163. data/lib/opener/opinion_detector_basic/version.rb +5 -0
  164. data/lib/opener/opinion_detector_basic/views/index.erb +97 -0
  165. data/lib/opener/opinion_detector_basic/views/result.erb +15 -0
  166. data/opener-opinion-detector-basic.gemspec +36 -0
  167. data/pre_build_requirements.txt +1 -0
  168. metadata +309 -0
@@ -0,0 +1,86 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3
+ <ItemGroup Label="ProjectConfigurations">
4
+ <ProjectConfiguration Include="Debug|Win32">
5
+ <Configuration>Debug</Configuration>
6
+ <Platform>Win32</Platform>
7
+ </ProjectConfiguration>
8
+ <ProjectConfiguration Include="Release|Win32">
9
+ <Configuration>Release</Configuration>
10
+ <Platform>Win32</Platform>
11
+ </ProjectConfiguration>
12
+ </ItemGroup>
13
+ <PropertyGroup Label="Globals">
14
+ <ProjectGuid>{46A23DE6-7E34-4429-8F15-FCC3C083FC5B}</ProjectGuid>
15
+ <RootNamespace>cqdb</RootNamespace>
16
+ <Keyword>Win32Proj</Keyword>
17
+ </PropertyGroup>
18
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
19
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
20
+ <ConfigurationType>StaticLibrary</ConfigurationType>
21
+ <CharacterSet>Unicode</CharacterSet>
22
+ <WholeProgramOptimization>true</WholeProgramOptimization>
23
+ </PropertyGroup>
24
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
25
+ <ConfigurationType>StaticLibrary</ConfigurationType>
26
+ <CharacterSet>Unicode</CharacterSet>
27
+ </PropertyGroup>
28
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
29
+ <ImportGroup Label="ExtensionSettings">
30
+ </ImportGroup>
31
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
32
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
33
+ </ImportGroup>
34
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
35
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
36
+ </ImportGroup>
37
+ <PropertyGroup Label="UserMacros" />
38
+ <PropertyGroup>
39
+ <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
40
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
41
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
42
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
43
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
44
+ <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
45
+ <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
46
+ <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" />
47
+ <CodeAnalysisRuleSet Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">AllRules.ruleset</CodeAnalysisRuleSet>
48
+ <CodeAnalysisRules Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
49
+ <CodeAnalysisRuleAssemblies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" />
50
+ </PropertyGroup>
51
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
52
+ <ClCompile>
53
+ <Optimization>Disabled</Optimization>
54
+ <AdditionalIncludeDirectories>$(SolutionDir)include;$(SolutionDir)win32;include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
55
+ <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
56
+ <MinimalRebuild>true</MinimalRebuild>
57
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
58
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
59
+ <PrecompiledHeader>
60
+ </PrecompiledHeader>
61
+ <WarningLevel>Level3</WarningLevel>
62
+ <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
63
+ </ClCompile>
64
+ </ItemDefinitionGroup>
65
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
66
+ <ClCompile>
67
+ <AdditionalIncludeDirectories>$(SolutionDir)include;$(SolutionDir)win32;include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
68
+ <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
69
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
70
+ <PrecompiledHeader>
71
+ </PrecompiledHeader>
72
+ <WarningLevel>Level3</WarningLevel>
73
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
74
+ </ClCompile>
75
+ </ItemDefinitionGroup>
76
+ <ItemGroup>
77
+ <ClCompile Include="src\cqdb.c" />
78
+ <ClCompile Include="src\lookup3.c" />
79
+ </ItemGroup>
80
+ <ItemGroup>
81
+ <ClInclude Include="include\cqdb.h" />
82
+ </ItemGroup>
83
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
84
+ <ImportGroup Label="ExtensionTargets">
85
+ </ImportGroup>
86
+ </Project>
@@ -0,0 +1,524 @@
1
+ /*
2
+ * Constant Quark Database (CQDB).
3
+ *
4
+ * Copyright (c) 2007, Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ * * Neither the name of the Northwestern University, University of Tokyo,
15
+ * nor the names of its contributors may be used to endorse or promote
16
+ * products derived from this software without specific prior written
17
+ * permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
23
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+ */
31
+
32
+ /* $Id$ */
33
+
34
+ #ifndef __CQDB_H__
35
+ #define __CQDB_H__
36
+
37
+ /** @file */
38
+
39
+
40
+
41
+ /**
42
+ * \addtogroup cqdb_const CQDB Constants
43
+ * @{
44
+ *
45
+ * The CQDB Constants.
46
+ */
47
+
48
+ /**
49
+ * CQDB flags.
50
+ */
51
+ enum {
52
+ CQDB_NONE = 0, /**< No flag. */
53
+ CQDB_ONEWAY = 0x00000001, /**< A reverse lookup array is omitted. */
54
+ CQDB_ERROR_OCCURRED = 0x00010000, /**< An error has occurred. */
55
+ };
56
+
57
+ /**
58
+ * CQDB status codes.
59
+ */
60
+ enum {
61
+ CQDB_SUCCESS = 0, /**< Success. */
62
+ CQDB_ERROR = -1024, /**< Unspecified error. */
63
+ CQDB_ERROR_NOTFOUND, /**< String not found. */
64
+ CQDB_ERROR_OUTOFMEMORY, /**< Insufficient memory. */
65
+ CQDB_ERROR_FILEWRITE, /**< Error in fwrite() operations. */
66
+ CQDB_ERROR_FILETELL, /**< Error in ftell() operations. */
67
+ CQDB_ERROR_FILESEEK, /**< Error in fseek() operations. */
68
+ CQDB_ERROR_INVALIDID, /**< Invalid parameters. */
69
+ };
70
+
71
+ /** @} */
72
+
73
+
74
+
75
+ /**
76
+ * \addtogroup cqdb_writer CQDB Writer API
77
+ * @{
78
+ *
79
+ * The CQDB Writer API constructs a CQDB chunk on a seekable stream. The
80
+ * seekable stream must be created by the fopen() function with writable and
81
+ * binary flags ("wb"). The CQDB Writer API can build a CQDB chunk at any
82
+ * position on the stream; one can thus write some data, append a CQDB chunk,
83
+ * and continue writing other data on the stream.
84
+ *
85
+ * By default, the function cqdb_writer() constructs a database with forward
86
+ * (string to integer identifier) and backward (integer identifier to string)
87
+ * lookups. The data for reverse lookup is omitted with ::CQDB_ONEWAY flag
88
+ * specified.
89
+ *
90
+ * It is recommended to keep the maximum number of identifiers as smallest as
91
+ * possible because reverse lookup is maintained by a array with the size of
92
+ * sizeof(int) * (maximum number of identifiers + 1). For example, putting a
93
+ * set of integer identifers (0, 1, 1000) creates a reverse lookup array with
94
+ * 1001 elements only to waste the disk space for 998 (= 1001-3) elements in
95
+ * the array.
96
+ */
97
+
98
+ struct tag_cqdb_writer;
99
+ typedef struct tag_cqdb_writer cqdb_writer_t; /**< Typedef of a CQDB writer. */
100
+
101
+ /**
102
+ * Create a new CQDB writer on a seekable stream.
103
+ *
104
+ * This function initializes a database on the seekable stream and returns
105
+ * the pointer to a ::cqdb_writer_t instance to write the database.
106
+ * The stream must have the writable and binary flags. The database creation
107
+ * flag must be zero except when the reverse lookup array is unnecessary;
108
+ * specifying ::CQDB_ONEWAY flag will save the storage space for the reverse
109
+ * lookup array. Once calling this function, one should avoid accessing the
110
+ * seekable stream directly until calling cqdb_writer_close().
111
+ *
112
+ * @param fp The pointer to the writable and seekable stream.
113
+ * @param flag Database creation flag.
114
+ * @retval cqdb_writer_t* The pointer to the new ::cqdb_writer_t instance if
115
+ * successful; otherwise \c NULL.
116
+ */
117
+ cqdb_writer_t* cqdb_writer(FILE *fp, int flag);
118
+
119
+ /**
120
+ * Put a string/identifier association to the database.
121
+ *
122
+ * This function append a string/identifier association into the database.
123
+ * Make sure that the string and/or identifier have never been inserted to
124
+ * the database and that the identifier is a non-negative value.
125
+ *
126
+ * @param dbw The pointer to the ::cqdb_writer_t instance.
127
+ * @param str The pointer to the string.
128
+ * @param id The identifier.
129
+ * @retval int Zero if successful, or a status code otherwise.
130
+ */
131
+ int cqdb_writer_put(cqdb_writer_t* dbw, const char *str, int id);
132
+
133
+ /**
134
+ * Close a CQDB writer.
135
+ *
136
+ * This function finalizes the database on the stream. If successful, the
137
+ * data remaining on the memory is flushed to the stream; the stream position
138
+ * is moved to the end of the chunk. If an unexpected error occurs, this
139
+ * function tries to rewind the stream position to the original position when
140
+ * the function cqdb_writer() was called.
141
+ *
142
+ * @param dbw The pointer to the ::cqdb_writer_t instance.
143
+ * @retval int Zero if successful, or a status code otherwise.
144
+ */
145
+ int cqdb_writer_close(cqdb_writer_t* dbw);
146
+
147
+ /** @} */
148
+
149
+
150
+
151
+ /**
152
+ * \addtogroup cqdb_reader CQDB Reader API
153
+ * @{
154
+ *
155
+ * The CQDB reader API provides a read access to the database whose memory
156
+ * image is loaded on a memory block. The memory-passing interface has
157
+ * several advantages. Firstly, one can choose an efficient way for their
158
+ * application to load a database image to a memory block, e.g., to read
159
+ * the whole image from a file, to use the Memory Mapped File (mmap) API,
160
+ * etc.
161
+ * Secondaly, one can design the file format freely only if the memory
162
+ * block for a database is extracted from the file.
163
+ *
164
+ * The most fundamental operation on the CQDB reader API is forward lookup
165
+ * through the use of cqdb_to_id() function, which retrieves integer
166
+ * identifiers from strings. Reverse lookup (retrieving strings from integer
167
+ * identifiers) with cqdb_to_string() function is not supported if the
168
+ * database has been created with ::CQDB_ONEWAY flag.
169
+ */
170
+
171
+ struct tag_cqdb;
172
+ typedef struct tag_cqdb cqdb_t; /**< Typedef of a CQDB reader. */
173
+
174
+ /**
175
+ * Open a new CQDB reader on a memory block.
176
+ *
177
+ * This function initializes a database on a memory block and returns the
178
+ * pointer to a ::cqdb_t instance to access the database.
179
+ *
180
+ * @param buffer The pointer to the memory block.
181
+ * @param size The size of the memory block.
182
+ * @retval cqdb_t* The pointer to the ::cqdb_t instance.
183
+ */
184
+ cqdb_t* cqdb_reader(void *buffer, size_t size);
185
+
186
+ /**
187
+ * Delete the CQDB reader.
188
+ *
189
+ * This function frees the work area allocated by cqdb_reader() function.
190
+ *
191
+ * @param db The pointer to the ::cqdb_t instance.
192
+ */
193
+ void cqdb_delete(cqdb_t* db);
194
+
195
+ /**
196
+ * Retrieve the identifier associated with a string.
197
+ *
198
+ * This function returns the identifier associated with a string.
199
+ *
200
+ * @param db The pointer to the ::cqdb_t instance.
201
+ * @param str The pointer to a string.
202
+ * @retval int The non-negative identifier if successful, negative
203
+ * status code otherwise.
204
+ */
205
+ int cqdb_to_id(cqdb_t* db, const char *str);
206
+
207
+ /**
208
+ * Retrieve the string associated with an identifier.
209
+ *
210
+ * This function returns the string associated with an identifier.
211
+ *
212
+ * @param db The pointer to the cqdb_t instance.
213
+ * @param id The id.
214
+ * @retval const char* The pointer to the string associated with the
215
+ * identifier if successful; otherwise \c NULL.
216
+ */
217
+ const char* cqdb_to_string(cqdb_t* db, int id);
218
+
219
+ /**
220
+ * Get the number of associations in the database.
221
+ *
222
+ * This function returns the number of associations in the database.
223
+ *
224
+ * @param db The pointer to the ::cqdb_t instance.
225
+ * @retval int The number of string/identifier associations.
226
+ */
227
+ int cqdb_num(cqdb_t* db);
228
+
229
+ /** @} */
230
+
231
+
232
+
233
+ /**
234
+ @mainpage Constant Quark Database (CQDB)
235
+
236
+ @section intro Introduction
237
+
238
+ It is a common technique for speed and memory optimizations that an
239
+ application converts all string values into integer identifiers, does some
240
+ processing with integer values, and then restores the original string values
241
+ (if necessary). The data structure for two-way associations between strings
242
+ and integer identifiers is known as Quark:
243
+ - GQuark in GLib: http://www.gtk.org/
244
+ - quark (C++): http://www.chokkan.org/software/sample/quark.h
245
+
246
+ Constant Quark Database (CQDB) is a database library specialized for
247
+ serialization and retrieval of <i>static</i> associations between strings and
248
+ integer identifiers. The database provides several features:
249
+ - <b>Fast look-ups.</b> Retrieving an integer identifier for a string is
250
+ usually done by accessing three memory blocks. Retrieving a string for an
251
+ integer identifier is always done by accessing two memory blocks.
252
+ See the @ref performance "performance" evaluation.
253
+ - <b>Low overhead.</b> A CQDB database consists of a chunk header (24 bytes),
254
+ hash tables (2048 bytes and 16 bytes per record), a reverse lookup array
255
+ (4 bytes per integer identifier), and records (8 bytes + string size per
256
+ record).
257
+ See the @ref performance "performance" evaluation.
258
+ - <b>Sophisticated hash function.</b> CQDB incorporates the fast and
259
+ collision-resistant hash function for strings
260
+ (<a href="http://www.burtleburtle.net/bob/c/lookup3.c">lookup3.c</a>)
261
+ implemented by Bob Jenkins.
262
+ - <b>Chunk format.</b> The structure of CQDB is designed to store the data in
263
+ a chunk of a file; CQDB can be embedded into a file with other arbitrary
264
+ data.
265
+ - <b>Omissible reverse look-up array.</b> The reverse look-up array can be
266
+ omitted if it is not necessary to retrieve strings from integer identifiers.
267
+ - <b>Cross platform.</b> The source code can be compiled on Microsoft Visual
268
+ Studio 2005, GNU C Compiler (gcc), etc.
269
+ - <b>Simple API.</b> The CQDB API exposes only a few functions.
270
+
271
+ CQDB is suitable for implementing dictionaries in which fast look-ups of
272
+ strings and identifiers are essential while a dictionary update rarely occurs.
273
+ The data structure is a specialization (and extension) of the
274
+ <a href="http://cr.yp.to/cdb.html">Constant Database</a> proposed by
275
+ Daniel J. Bernstein.
276
+
277
+ CQDB does not support assigning a unique integer identifier for a given string,
278
+ modify associations, nor check collisions in strings and identifiers; thus,
279
+ it may be necessary to use an existing Quark implementation to manage proper
280
+ associations between strings and identifiers on memory.
281
+
282
+ This library is used by the
283
+ <a href="http://www.chokkan.org/software/crfsuite/">CRFsuite</a> project.
284
+
285
+ @section download Download
286
+
287
+ - <a href="http://www.chokkan.org/software/dist/cqdb-1.1.tar.gz">Source code</a>
288
+
289
+ CQDB is distributed under the term of the
290
+ <a href="http://www.opensource.org/licenses/bsd-license.php">modified BSD license</a>.
291
+
292
+ @section changelog History
293
+ - Version 1.1 (2007-12-01):
294
+ - Fixed a bug when a CQDB chunk is embedded to a file.
295
+
296
+ - Version 1.0 (2007-09-20):
297
+ - Initial release.
298
+
299
+ @section api Documentation
300
+
301
+ - @ref cqdb_const "CQDB Constants"
302
+ - @ref cqdb_reader "CQDB Reader API"
303
+ - @ref cqdb_writer "CQDB Writer API"
304
+
305
+ @section sample Sample programs
306
+ @subsection sample_writer A writer sample
307
+
308
+ This sample code constructs a database "test.cqdb" with 1,000,000
309
+ string/identifier associations,
310
+ "00000000"/0, "00000001"/1, ..., "01000000"/1000000.
311
+
312
+ @code
313
+
314
+ #include <stdio.h>
315
+ #include <stdlib.h>
316
+ #include <string.h>
317
+ #include "cqdb.h"
318
+
319
+ #define DBNAME "test.cqdb"
320
+ #define NUMELEMS 1000000
321
+
322
+ int main(int argc, char *argv[])
323
+ {
324
+ int i, ret;
325
+ char str[10];
326
+ FILE *fp = NULL;
327
+ cqdb_writer_t* dbw = NULL;
328
+
329
+ // Open a file for writing.
330
+ fp = fopen(DBNAME, "wb");
331
+ if (fp == NULL) {
332
+ fprintf(stderr, "ERROR: failed to open the file.\n");
333
+ return 1;
334
+ }
335
+
336
+ // Create a CQDB on the file stream.
337
+ dbw = cqdb_writer(fp, 0);
338
+ if (dbw == NULL) {
339
+ fprintf(stderr, "ERROR: failed to create a CQDB on the file.\n");
340
+ goto error_exit;
341
+ }
342
+
343
+ // Put string/integer associations, "00000001"/1, ..., "01000000"/1000000.
344
+ for (i = 0;i < NUMELEMS;++i) {
345
+ sprintf(str, "%08d", i);
346
+ if (ret = cqdb_writer_put(dbw, str, i)) {
347
+ fprintf(stderr, "ERROR: failed to put a pair '%s'/%d.\n", str, i);
348
+ goto error_exit;
349
+ }
350
+ }
351
+
352
+ // Close the CQDB.
353
+ if (ret = cqdb_writer_close(dbw)) {
354
+ fprintf(stderr, "ERROR: failed to close the CQDB.\n");
355
+ goto error_exit;
356
+ }
357
+
358
+ // Close the file.
359
+ fclose(fp);
360
+ return 0;
361
+
362
+ error_exit:
363
+ if (dbw != NULL) cqdb_writer_close(dbw);
364
+ if (fp != NULL) fclose(fp);
365
+ return 1;
366
+ }
367
+
368
+ @endcode
369
+
370
+ @subsection sample_reader A reader sample
371
+
372
+ This sample code issues string queries "00000000", ..., "01000000" to retrive
373
+ integer identifiers (forward lookups) and integer queries 0, ..., 1000000 to
374
+ retrieve the strings "00000000", ..., "01000000".
375
+
376
+ @code
377
+
378
+ #include <stdio.h>
379
+ #include <stdlib.h>
380
+ #include <string.h>
381
+ #include "cqdb.h"
382
+
383
+ #define DBNAME "test.cqdb"
384
+ #define NUMELEMS 1000000
385
+
386
+ int main(int argc, char *argv[])
387
+ {
388
+ int i, j, ret;
389
+ long size = 0;
390
+ char str[10], *value = NULL, *buffer = NULL;
391
+ FILE *fp = NULL;
392
+ cqdb_t* db = NULL;
393
+
394
+ // Open the database.
395
+ fp = fopen(DBNAME, "rb");
396
+ if (fp == NULL) {
397
+ fprintf(stderr, "ERROR: failed to open the file\n");
398
+ return 1;
399
+ }
400
+
401
+ // Obtain the file size.
402
+ fseek(fp, 0, SEEK_END);
403
+ size = ftell(fp);
404
+ fseek(fp, 0, SEEK_SET);
405
+
406
+ // Read the content of the file at a time.
407
+ buffer = (char *)malloc(size);
408
+ if (buffer == NULL) {
409
+ fprintf(stderr, "ERROR: out of memory.\n");
410
+ goto error_exit;
411
+ }
412
+ fread(buffer, 1, size, fp);
413
+ fclose(fp);
414
+ fp = NULL;
415
+
416
+ // Open the database on the memory.
417
+ db = cqdb_reader(buffer, size);
418
+ if (db == NULL) {
419
+ fprintf(stderr, "ERROR: failed to open a CQDB on the file.\n");
420
+ goto error_exit;
421
+ }
422
+
423
+ // Forward lookups: strings to integer identifiers.
424
+ for (i = 0;i < NUMELEMS;++i) {
425
+ sprintf(str, "%08d", i);
426
+ j = cqdb_to_id(db, str);
427
+ // Validity check.
428
+ if (j < 0 || i != j) {
429
+ fprintf(stderr, "ERROR: inconsistency error '%s'/%d.\n", str, i);
430
+ goto error_exit;
431
+ }
432
+ }
433
+
434
+ // Reverse lookups: integer identifiers to strings.
435
+ for (i = 0;i < NUMELEMS;++i) {
436
+ sprintf(str, "%08d", i);
437
+ value = cqdb_to_string(db, i);
438
+ // Validity check.
439
+ if (value == NULL || strcmp(str, value) != 0) {
440
+ fprintf(stderr, "ERROR: inconsistency error '%s'/%d.\n", str, i);
441
+ goto error_exit;
442
+ }
443
+ }
444
+
445
+ // Delete the instance of the CQDB.
446
+ cqdb_delete(db);
447
+ free(buffer);
448
+
449
+ return 0;
450
+
451
+ error_exit:
452
+ if (fp != NULL) fclose(fp);
453
+ if (buffer != NULL) free(buffer);
454
+ return 1;
455
+ }
456
+
457
+ @endcode
458
+
459
+ @section performance Performance
460
+
461
+ An experiment for performance comparision with
462
+ <a href="http://www.oracle.com/database/berkeley-db/">Berkeley DB (BDB) 4.5.20</a>
463
+ and <a href="http://qdbm.sourceforge.net/">Quick Database Manager (QDBM) 1.8.75</a>
464
+ was conducted.
465
+ Constructing a database with 1,000,000 string/identifier associations,
466
+ "00000000"/0, "00000001"/1, ..., "01000000"/1000000, this experiment issued
467
+ string queries "00000000", ..., "01000000" (forward lookups) and integer
468
+ queries 0, ..., 1000000 (reverse lookups) to the database. Since BDB and
469
+ QDBM do not support reverse lookups, reverse items (key: identifier,
470
+ value: string) were inserted to the database in addition to the forward items
471
+ (key: string, value: integer). Microsoft Windows Vista Business was running on
472
+ the test environment (Intel Core2Duo 6600 (2.40GHz) processor, Intel G965
473
+ chipset, 2GB main memory, and Seagate ST3320620 HDD).
474
+ The test codes were compiled with Microsoft Visual Studio 2005.
475
+
476
+ This table shows the elapsed time for constructing the database (write time),
477
+ the elapsed time for processing with the queries (read time), and the size of
478
+ the database generated by each database library.
479
+ The read/write access was extremely faster than those of other database
480
+ libraries. The database was smaller than half the size of those generated by
481
+ other libraries.
482
+ This results suggest that the CQDB has the substantial advantage over the
483
+ existing database libraries for implementing a static quark database.
484
+
485
+ <table>
486
+ <tr>
487
+ <th>Database</th><th>Parameters</th><th>Write time [sec]</th><th>Read time [sec]</th><th>Database size [MB]</th>
488
+ </tr>
489
+ <tr align="right">
490
+ <td align="left">Constant Quark Database (CQDB) 1.0</td>
491
+ <td align="left">Default (none)</td>
492
+ <td><b>1.48</b></td><td><b>0.65</b></td><td><b>35.2</b></td>
493
+ </tr>
494
+ <tr align="right">
495
+ <td align="left">Berkeley DB (BDB) 4.5.20</td>
496
+ <td align="left">Default</td>
497
+ <td>91.8</td><td>37.5</td><td>79.7</td>
498
+ </tr>
499
+ <tr align="right">
500
+ <td align="left">Berkeley DB (BDB) 4.5.20</td>
501
+ <td align="left">table_size=4000000; cache_size=200MB</td>
502
+ <td>57.8</td><td>37.5</td><td>79.7</td>
503
+ </tr>
504
+ <tr align="right">
505
+ <td align="left">Quick Database Manager (QDBM) 1.8.75</td>
506
+ <td align="left">Default</td>
507
+ <td>95.4</td><td>80.6</td><td>76.3</td>
508
+ </tr>
509
+ <tr align="right">
510
+ <td align="left">Quick Database Manager (QDBM) 1.8.75</td>
511
+ <td align="left">table_size=4000000</td>
512
+ <td>15.7</td><td>12.0</td><td>92.2</td>
513
+ </tr>
514
+ </table>
515
+
516
+ @section reference Reference
517
+ - <a href="http://cr.yp.to/cdb.html">cdb</a> by Daniel J. Bernstein.
518
+ - <a href="http://www.corpit.ru/mjt/tinycdb.html">TinyCDB</a> by Michael Tokarev.
519
+ - <a href="http://www.unixuser.org/~euske/doc/cdbinternals/index.html">Constant Database (cdb) Internals</a> by Yusuke Shinyama.
520
+ - <a href="http://www.burtleburtle.net/bob/hash/index.html">Hash Functions and Block Ciphers</a> by Bob Jenkins.
521
+
522
+ */
523
+
524
+ #endif/*__CQDB_H__*/