esruby 0.0.0 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (385) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +6 -6
  3. data/bin/esruby +9 -0
  4. data/lib/esruby.rb +8 -0
  5. data/resources/mruby/build_config.rb +0 -1
  6. data/resources/mruby/mrbgems/mruby-print/mrblib/print.rb +1 -1
  7. data/resources/project_template/app/app.rb +2 -0
  8. data/resources/project_template/config.rb +35 -0
  9. data/resources/project_template/www/index.html +17 -0
  10. metadata +6 -377
  11. data/resources/mruby/bin/mirb +0 -0
  12. data/resources/mruby/bin/mrbc +0 -0
  13. data/resources/mruby/bin/mruby +0 -0
  14. data/resources/mruby/bin/mruby-strip +0 -0
  15. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/README.md +0 -82
  16. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/mrbgem.rake +0 -63
  17. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/mrblib/regexp_pcre.rb +0 -232
  18. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/mrblib/string_pcre.rb +0 -333
  19. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/132html +0 -313
  20. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/AUTHORS +0 -45
  21. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/CMakeLists.txt +0 -959
  22. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/COPYING +0 -5
  23. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/ChangeLog +0 -4981
  24. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/CheckMan +0 -67
  25. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/CleanTxt +0 -113
  26. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/Detrail +0 -35
  27. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/HACKING +0 -473
  28. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/INSTALL +0 -370
  29. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/LICENCE +0 -92
  30. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/Makefile.am +0 -877
  31. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/Makefile.in +0 -2917
  32. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/NEWS +0 -611
  33. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/NON-AUTOTOOLS-BUILD +0 -639
  34. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/NON-UNIX-USE +0 -7
  35. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/PrepareRelease +0 -253
  36. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/README +0 -935
  37. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/RunGrepTest +0 -551
  38. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/RunTest +0 -1015
  39. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/RunTest.bat +0 -616
  40. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/aclocal.m4 +0 -1230
  41. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/autom4te.cache/output.0 +0 -21280
  42. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/autom4te.cache/output.1 +0 -21280
  43. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/autom4te.cache/requests +0 -273
  44. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/autom4te.cache/traces.0 +0 -2421
  45. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/autom4te.cache/traces.1 +0 -1144
  46. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/cmake/COPYING-CMAKE-SCRIPTS +0 -22
  47. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/cmake/FindEditline.cmake +0 -17
  48. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/cmake/FindPackageHandleStandardArgs.cmake +0 -58
  49. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/cmake/FindReadline.cmake +0 -29
  50. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/compile +0 -343
  51. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/config-cmake.h.in +0 -54
  52. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/config.guess +0 -1552
  53. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/config.h +0 -392
  54. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/config.h.generic +0 -392
  55. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/config.h.in +0 -343
  56. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/config.sub +0 -1804
  57. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/configure +0 -21280
  58. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/configure.ac +0 -1082
  59. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/depcomp +0 -708
  60. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/dftables.c +0 -212
  61. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/index.html +0 -180
  62. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre-config.html +0 -109
  63. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre.html +0 -204
  64. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre16.html +0 -383
  65. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_assign_jit_stack.html +0 -76
  66. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_compile.html +0 -108
  67. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_compile2.html +0 -112
  68. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_config.html +0 -91
  69. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_copy_named_substring.html +0 -65
  70. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_copy_substring.html +0 -61
  71. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_dfa_exec.html +0 -128
  72. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_exec.html +0 -110
  73. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_free_study.html +0 -46
  74. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_free_substring.html +0 -46
  75. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_free_substring_list.html +0 -46
  76. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_fullinfo.html +0 -108
  77. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_get_named_substring.html +0 -68
  78. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_get_stringnumber.html +0 -57
  79. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_get_stringtable_entries.html +0 -60
  80. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_get_substring.html +0 -64
  81. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_get_substring_list.html +0 -61
  82. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_jit_exec.html +0 -108
  83. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_jit_stack_alloc.html +0 -55
  84. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_jit_stack_free.html +0 -48
  85. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_maketables.html +0 -48
  86. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_pattern_to_host_byte_order.html +0 -58
  87. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_refcount.html +0 -51
  88. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_study.html +0 -68
  89. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_utf16_to_host_byte_order.html +0 -57
  90. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcre_version.html +0 -46
  91. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcreapi.html +0 -2786
  92. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcrebuild.html +0 -517
  93. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcrecallout.html +0 -243
  94. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcrecompat.html +0 -216
  95. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcrecpp.html +0 -368
  96. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcredemo.html +0 -426
  97. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcregrep.html +0 -757
  98. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcrejit.html +0 -458
  99. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcrelimits.html +0 -86
  100. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcrematching.html +0 -233
  101. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcrepartial.html +0 -474
  102. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcrepattern.html +0 -2953
  103. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcreperform.html +0 -195
  104. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcreposix.html +0 -292
  105. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcreprecompile.html +0 -158
  106. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcresample.html +0 -110
  107. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcrestack.html +0 -225
  108. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcresyntax.html +0 -521
  109. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcretest.html +0 -1082
  110. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/html/pcreunicode.html +0 -270
  111. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/index.html.src +0 -180
  112. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre-config.1 +0 -92
  113. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre-config.txt +0 -86
  114. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre.3 +0 -202
  115. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre.txt +0 -9909
  116. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre16.3 +0 -390
  117. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre32.3 +0 -389
  118. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_assign_jit_stack.3 +0 -61
  119. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_compile.3 +0 -98
  120. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_compile2.3 +0 -106
  121. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_config.3 +0 -76
  122. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_copy_named_substring.3 +0 -59
  123. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_copy_substring.3 +0 -52
  124. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_dfa_exec.3 +0 -125
  125. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_exec.3 +0 -103
  126. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_free_study.3 +0 -31
  127. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_free_substring.3 +0 -31
  128. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_free_substring_list.3 +0 -31
  129. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_fullinfo.3 +0 -95
  130. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_get_named_substring.3 +0 -62
  131. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_get_stringnumber.3 +0 -45
  132. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_get_stringtable_entries.3 +0 -48
  133. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_get_substring.3 +0 -55
  134. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_get_substring_list.3 +0 -49
  135. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_jit_exec.3 +0 -104
  136. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_jit_stack_alloc.3 +0 -45
  137. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_jit_stack_free.3 +0 -35
  138. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_maketables.3 +0 -33
  139. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_pattern_to_host_byte_order.3 +0 -46
  140. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_refcount.3 +0 -36
  141. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_study.3 +0 -56
  142. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_utf16_to_host_byte_order.3 +0 -46
  143. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_utf32_to_host_byte_order.3 +0 -46
  144. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcre_version.3 +0 -31
  145. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcreapi.3 +0 -2823
  146. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcrebuild.3 +0 -520
  147. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcrecallout.3 +0 -214
  148. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcrecompat.3 +0 -185
  149. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcrecpp.3 +0 -348
  150. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcregrep.1 +0 -679
  151. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcregrep.txt +0 -740
  152. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcrejit.3 +0 -437
  153. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcrelimits.3 +0 -67
  154. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcrematching.3 +0 -206
  155. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcrepartial.3 +0 -445
  156. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcrepattern.3 +0 -2983
  157. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcreperform.3 +0 -177
  158. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcreposix.3 +0 -270
  159. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcreprecompile.3 +0 -151
  160. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcresample.3 +0 -99
  161. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcrestack.3 +0 -215
  162. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcresyntax.3 +0 -496
  163. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcretest.1 +0 -1079
  164. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcretest.txt +0 -1012
  165. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/pcreunicode.3 +0 -255
  166. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/doc/perltest.txt +0 -42
  167. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/install-sh +0 -527
  168. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/libpcre.pc.in +0 -12
  169. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/libpcre16.pc.in +0 -12
  170. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/libpcre32.pc.in +0 -12
  171. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/libpcrecpp.pc.in +0 -12
  172. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/libpcreposix.pc.in +0 -13
  173. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/ltmain.sh +0 -9636
  174. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/m4/ax_pthread.m4 +0 -309
  175. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/m4/libtool.m4 +0 -7844
  176. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/m4/ltoptions.m4 +0 -369
  177. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/m4/ltsugar.m4 +0 -123
  178. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/m4/ltversion.m4 +0 -23
  179. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/m4/lt~obsolete.m4 +0 -98
  180. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/m4/pcre_visibility.m4 +0 -89
  181. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/makevp.bat +0 -66
  182. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/makevp_c.txt +0 -20
  183. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/makevp_l.txt +0 -20
  184. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/missing +0 -331
  185. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre-config.in +0 -133
  186. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre.h +0 -653
  187. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre.h.generic +0 -653
  188. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre.h.in +0 -653
  189. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_byte_order.c +0 -45
  190. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_chartables.c +0 -45
  191. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_compile.c +0 -45
  192. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_config.c +0 -45
  193. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_dfa_exec.c +0 -45
  194. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_exec.c +0 -45
  195. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_fullinfo.c +0 -45
  196. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_get.c +0 -45
  197. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_globals.c +0 -45
  198. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_jit_compile.c +0 -45
  199. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_maketables.c +0 -45
  200. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_newline.c +0 -45
  201. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_ord2utf16.c +0 -90
  202. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_printint.c +0 -45
  203. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_refcount.c +0 -45
  204. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_string_utils.c +0 -45
  205. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_study.c +0 -45
  206. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_tables.c +0 -45
  207. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_ucd.c +0 -45
  208. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_utf16_utils.c +0 -130
  209. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_valid_utf16.c +0 -156
  210. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_version.c +0 -45
  211. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre16_xclass.c +0 -45
  212. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_byte_order.c +0 -45
  213. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_chartables.c +0 -45
  214. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_compile.c +0 -45
  215. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_config.c +0 -45
  216. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_dfa_exec.c +0 -45
  217. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_exec.c +0 -45
  218. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_fullinfo.c +0 -45
  219. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_get.c +0 -45
  220. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_globals.c +0 -45
  221. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_jit_compile.c +0 -45
  222. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_maketables.c +0 -45
  223. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_newline.c +0 -45
  224. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_ord2utf32.c +0 -82
  225. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_printint.c +0 -45
  226. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_refcount.c +0 -45
  227. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_string_utils.c +0 -45
  228. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_study.c +0 -45
  229. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_tables.c +0 -45
  230. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_ucd.c +0 -45
  231. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_utf32_utils.c +0 -141
  232. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_valid_utf32.c +0 -131
  233. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_version.c +0 -45
  234. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre32_xclass.c +0 -45
  235. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_byte_order.c +0 -318
  236. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_chartables.c +0 -198
  237. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_chartables.c.dist +0 -198
  238. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_compile.c +0 -8386
  239. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_config.c +0 -186
  240. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_dfa_exec.c +0 -3582
  241. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_exec.c +0 -7049
  242. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_fullinfo.c +0 -231
  243. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_get.c +0 -662
  244. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_globals.c +0 -84
  245. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_internal.h +0 -2744
  246. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_jit_compile.c +0 -8560
  247. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_jit_test.c +0 -1614
  248. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_maketables.c +0 -151
  249. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_newline.c +0 -210
  250. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_ord2utf8.c +0 -94
  251. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_printint.c +0 -766
  252. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_refcount.c +0 -92
  253. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_scanner.cc +0 -199
  254. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_scanner.h +0 -172
  255. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_scanner_unittest.cc +0 -159
  256. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_string_utils.c +0 -211
  257. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_stringpiece.cc +0 -43
  258. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_stringpiece.h.in +0 -179
  259. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_stringpiece_unittest.cc +0 -150
  260. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_study.c +0 -1562
  261. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_tables.c +0 -655
  262. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_ucd.c +0 -3298
  263. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_valid_utf8.c +0 -312
  264. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_version.c +0 -98
  265. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcre_xclass.c +0 -198
  266. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcrecpp.cc +0 -922
  267. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcrecpp.h +0 -710
  268. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcrecpp_internal.h +0 -71
  269. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcrecpp_unittest.cc +0 -1291
  270. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcrecpparg.h.in +0 -174
  271. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcredemo.c +0 -406
  272. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcregexp.pas +0 -845
  273. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcregrep.c +0 -3180
  274. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcreposix.c +0 -419
  275. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcreposix.h +0 -146
  276. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/pcretest.c +0 -5488
  277. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/perltest.pl +0 -237
  278. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitConfig.h +0 -110
  279. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitConfigInternal.h +0 -484
  280. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitExecAllocator.c +0 -289
  281. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitLir.c +0 -1766
  282. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitLir.h +0 -985
  283. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativeARM_Thumb2.c +0 -2008
  284. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativeARM_v5.c +0 -2515
  285. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativeMIPS_32.c +0 -404
  286. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativeMIPS_common.c +0 -1881
  287. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativePPC_32.c +0 -269
  288. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativePPC_64.c +0 -421
  289. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativePPC_common.c +0 -2014
  290. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativeSPARC_32.c +0 -164
  291. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativeSPARC_common.c +0 -1348
  292. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativeX86_32.c +0 -547
  293. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativeX86_64.c +0 -810
  294. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitNativeX86_common.c +0 -2836
  295. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/sljit/sljitUtils.c +0 -332
  296. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/grepbinary +0 -0
  297. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/grepfilelist +0 -3
  298. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/grepinput +0 -611
  299. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/grepinput3 +0 -15
  300. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/grepinput8 +0 -11
  301. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/grepinputv +0 -4
  302. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/grepinputx +0 -43
  303. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/greplist +0 -7
  304. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/grepoutput +0 -707
  305. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/grepoutput8 +0 -12
  306. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/grepoutputN +0 -17
  307. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/greppatN4 +0 -2
  308. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/saved16 +0 -0
  309. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/saved16BE-1 +0 -0
  310. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/saved16BE-2 +0 -0
  311. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/saved16LE-1 +0 -0
  312. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/saved16LE-2 +0 -0
  313. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/saved32 +0 -0
  314. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/saved32BE-1 +0 -0
  315. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/saved32BE-2 +0 -0
  316. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/saved32LE-1 +0 -0
  317. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/saved32LE-2 +0 -0
  318. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/saved8 +0 -0
  319. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput1 +0 -5306
  320. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput10 +0 -1337
  321. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput11 +0 -135
  322. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput12 +0 -89
  323. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput13 +0 -9
  324. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput14 +0 -329
  325. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput15 +0 -430
  326. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput16 +0 -35
  327. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput17 +0 -296
  328. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput18 +0 -296
  329. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput19 +0 -22
  330. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput2 +0 -3813
  331. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput20 +0 -19
  332. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput21 +0 -16
  333. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput22 +0 -13
  334. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput23 +0 -16
  335. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput24 +0 -77
  336. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput25 +0 -32
  337. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput26 +0 -80
  338. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput3 +0 -95
  339. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput4 +0 -624
  340. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput5 +0 -772
  341. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput6 +0 -1319
  342. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput7 +0 -672
  343. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput8 +0 -4801
  344. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinput9 +0 -717
  345. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testinputEBC +0 -121
  346. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput1 +0 -8798
  347. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput10 +0 -2726
  348. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput11-16 +0 -713
  349. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput11-32 +0 -713
  350. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput11-8 +0 -713
  351. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput12 +0 -181
  352. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput13 +0 -21
  353. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput14 +0 -476
  354. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput15 +0 -1269
  355. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput16 +0 -121
  356. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput17 +0 -505
  357. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput18-16 +0 -1022
  358. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput18-32 +0 -1019
  359. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput19 +0 -88
  360. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput2 +0 -12484
  361. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput20 +0 -27
  362. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput21-16 +0 -90
  363. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput21-32 +0 -90
  364. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput22-16 +0 -71
  365. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput22-32 +0 -71
  366. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput23 +0 -42
  367. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput24 +0 -145
  368. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput25 +0 -79
  369. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput26 +0 -148
  370. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput3 +0 -169
  371. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput4 +0 -1094
  372. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput5 +0 -1849
  373. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput6 +0 -2137
  374. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput7 +0 -1473
  375. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput8 +0 -8019
  376. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutput9 +0 -1371
  377. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/testoutputEBC +0 -182
  378. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/wintestinput3 +0 -91
  379. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/testdata/wintestoutput3 +0 -166
  380. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/pcre/ucp.h +0 -197
  381. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/run_test.rb +0 -23
  382. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/src/mruby_regexp_pcre.c +0 -364
  383. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/test/matchdata.rb +0 -94
  384. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/test/regexp.rb +0 -264
  385. data/resources/mruby/build/mrbgems/mruby-regexp-pcre/test/string.rb +0 -201
@@ -1,186 +0,0 @@
1
- /*************************************************
2
- * Perl-Compatible Regular Expressions *
3
- *************************************************/
4
-
5
- /* PCRE is a library of functions to support regular expressions whose syntax
6
- and semantics are as close as possible to those of the Perl 5 language.
7
-
8
- Written by Philip Hazel
9
- Copyright (c) 1997-2012 University of Cambridge
10
-
11
- -----------------------------------------------------------------------------
12
- Redistribution and use in source and binary forms, with or without
13
- modification, are permitted provided that the following conditions are met:
14
-
15
- * Redistributions of source code must retain the above copyright notice,
16
- this list of conditions and the following disclaimer.
17
-
18
- * Redistributions in binary form must reproduce the above copyright
19
- notice, this list of conditions and the following disclaimer in the
20
- documentation and/or other materials provided with the distribution.
21
-
22
- * Neither the name of the University of Cambridge nor the names of its
23
- contributors may be used to endorse or promote products derived from
24
- this software without specific prior written permission.
25
-
26
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36
- POSSIBILITY OF SUCH DAMAGE.
37
- -----------------------------------------------------------------------------
38
- */
39
-
40
-
41
- /* This module contains the external function pcre_config(). */
42
-
43
-
44
- #ifdef HAVE_CONFIG_H
45
- #include "config.h"
46
- #endif
47
-
48
- /* Keep the original link size. */
49
- static int real_link_size = LINK_SIZE;
50
-
51
- #include "pcre_internal.h"
52
-
53
-
54
- /*************************************************
55
- * Return info about what features are configured *
56
- *************************************************/
57
-
58
- /* This function has an extensible interface so that additional items can be
59
- added compatibly.
60
-
61
- Arguments:
62
- what what information is required
63
- where where to put the information
64
-
65
- Returns: 0 if data returned, negative on error
66
- */
67
-
68
- #if defined COMPILE_PCRE8
69
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
70
- pcre_config(int what, void *where)
71
- #elif defined COMPILE_PCRE16
72
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
73
- pcre16_config(int what, void *where)
74
- #elif defined COMPILE_PCRE32
75
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
76
- pcre32_config(int what, void *where)
77
- #endif
78
- {
79
- switch (what)
80
- {
81
- case PCRE_CONFIG_UTF8:
82
- #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
83
- *((int *)where) = 0;
84
- return PCRE_ERROR_BADOPTION;
85
- #else
86
- #if defined SUPPORT_UTF
87
- *((int *)where) = 1;
88
- #else
89
- *((int *)where) = 0;
90
- #endif
91
- break;
92
- #endif
93
-
94
- case PCRE_CONFIG_UTF16:
95
- #if defined COMPILE_PCRE8 || defined COMPILE_PCRE32
96
- *((int *)where) = 0;
97
- return PCRE_ERROR_BADOPTION;
98
- #else
99
- #if defined SUPPORT_UTF
100
- *((int *)where) = 1;
101
- #else
102
- *((int *)where) = 0;
103
- #endif
104
- break;
105
- #endif
106
-
107
- case PCRE_CONFIG_UTF32:
108
- #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
109
- *((int *)where) = 0;
110
- return PCRE_ERROR_BADOPTION;
111
- #else
112
- #if defined SUPPORT_UTF
113
- *((int *)where) = 1;
114
- #else
115
- *((int *)where) = 0;
116
- #endif
117
- break;
118
- #endif
119
-
120
- case PCRE_CONFIG_UNICODE_PROPERTIES:
121
- #ifdef SUPPORT_UCP
122
- *((int *)where) = 1;
123
- #else
124
- *((int *)where) = 0;
125
- #endif
126
- break;
127
-
128
- case PCRE_CONFIG_JIT:
129
- #ifdef SUPPORT_JIT
130
- *((int *)where) = 1;
131
- #else
132
- *((int *)where) = 0;
133
- #endif
134
- break;
135
-
136
- case PCRE_CONFIG_JITTARGET:
137
- #ifdef SUPPORT_JIT
138
- *((const char **)where) = PRIV(jit_get_target)();
139
- #else
140
- *((const char **)where) = NULL;
141
- #endif
142
- break;
143
-
144
- case PCRE_CONFIG_NEWLINE:
145
- *((int *)where) = NEWLINE;
146
- break;
147
-
148
- case PCRE_CONFIG_BSR:
149
- #ifdef BSR_ANYCRLF
150
- *((int *)where) = 1;
151
- #else
152
- *((int *)where) = 0;
153
- #endif
154
- break;
155
-
156
- case PCRE_CONFIG_LINK_SIZE:
157
- *((int *)where) = real_link_size;
158
- break;
159
-
160
- case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
161
- *((int *)where) = POSIX_MALLOC_THRESHOLD;
162
- break;
163
-
164
- case PCRE_CONFIG_MATCH_LIMIT:
165
- *((unsigned long int *)where) = MATCH_LIMIT;
166
- break;
167
-
168
- case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
169
- *((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
170
- break;
171
-
172
- case PCRE_CONFIG_STACKRECURSE:
173
- #ifdef NO_RECURSE
174
- *((int *)where) = 0;
175
- #else
176
- *((int *)where) = 1;
177
- #endif
178
- break;
179
-
180
- default: return PCRE_ERROR_BADOPTION;
181
- }
182
-
183
- return 0;
184
- }
185
-
186
- /* End of pcre_config.c */
@@ -1,3582 +0,0 @@
1
- /*************************************************
2
- * Perl-Compatible Regular Expressions *
3
- *************************************************/
4
-
5
- /* PCRE is a library of functions to support regular expressions whose syntax
6
- and semantics are as close as possible to those of the Perl 5 language (but see
7
- below for why this module is different).
8
-
9
- Written by Philip Hazel
10
- Copyright (c) 1997-2012 University of Cambridge
11
-
12
- -----------------------------------------------------------------------------
13
- Redistribution and use in source and binary forms, with or without
14
- modification, are permitted provided that the following conditions are met:
15
-
16
- * Redistributions of source code must retain the above copyright notice,
17
- this list of conditions and the following disclaimer.
18
-
19
- * Redistributions in binary form must reproduce the above copyright
20
- notice, this list of conditions and the following disclaimer in the
21
- documentation and/or other materials provided with the distribution.
22
-
23
- * Neither the name of the University of Cambridge nor the names of its
24
- contributors may be used to endorse or promote products derived from
25
- this software without specific prior written permission.
26
-
27
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28
- AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29
- IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30
- ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32
- CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33
- SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34
- INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36
- ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37
- POSSIBILITY OF SUCH DAMAGE.
38
- -----------------------------------------------------------------------------
39
- */
40
-
41
- /* This module contains the external function pcre_dfa_exec(), which is an
42
- alternative matching function that uses a sort of DFA algorithm (not a true
43
- FSM). This is NOT Perl-compatible, but it has advantages in certain
44
- applications. */
45
-
46
-
47
- /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
48
- the performance of his patterns greatly. I could not use it as it stood, as it
49
- was not thread safe, and made assumptions about pattern sizes. Also, it caused
50
- test 7 to loop, and test 9 to crash with a segfault.
51
-
52
- The issue is the check for duplicate states, which is done by a simple linear
53
- search up the state list. (Grep for "duplicate" below to find the code.) For
54
- many patterns, there will never be many states active at one time, so a simple
55
- linear search is fine. In patterns that have many active states, it might be a
56
- bottleneck. The suggested code used an indexing scheme to remember which states
57
- had previously been used for each character, and avoided the linear search when
58
- it knew there was no chance of a duplicate. This was implemented when adding
59
- states to the state lists.
60
-
61
- I wrote some thread-safe, not-limited code to try something similar at the time
62
- of checking for duplicates (instead of when adding states), using index vectors
63
- on the stack. It did give a 13% improvement with one specially constructed
64
- pattern for certain subject strings, but on other strings and on many of the
65
- simpler patterns in the test suite it did worse. The major problem, I think,
66
- was the extra time to initialize the index. This had to be done for each call
67
- of internal_dfa_exec(). (The supplied patch used a static vector, initialized
68
- only once - I suspect this was the cause of the problems with the tests.)
69
-
70
- Overall, I concluded that the gains in some cases did not outweigh the losses
71
- in others, so I abandoned this code. */
72
-
73
-
74
-
75
- #ifdef HAVE_CONFIG_H
76
- #include "config.h"
77
- #endif
78
-
79
- #define NLBLOCK md /* Block containing newline information */
80
- #define PSSTART start_subject /* Field containing processed string start */
81
- #define PSEND end_subject /* Field containing processed string end */
82
-
83
- #include "pcre_internal.h"
84
-
85
-
86
- /* For use to indent debugging output */
87
-
88
- #define SP " "
89
-
90
-
91
- /*************************************************
92
- * Code parameters and static tables *
93
- *************************************************/
94
-
95
- /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
96
- into others, under special conditions. A gap of 20 between the blocks should be
97
- enough. The resulting opcodes don't have to be less than 256 because they are
98
- never stored, so we push them well clear of the normal opcodes. */
99
-
100
- #define OP_PROP_EXTRA 300
101
- #define OP_EXTUNI_EXTRA 320
102
- #define OP_ANYNL_EXTRA 340
103
- #define OP_HSPACE_EXTRA 360
104
- #define OP_VSPACE_EXTRA 380
105
-
106
-
107
- /* This table identifies those opcodes that are followed immediately by a
108
- character that is to be tested in some way. This makes it possible to
109
- centralize the loading of these characters. In the case of Type * etc, the
110
- "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
111
- small value. Non-zero values in the table are the offsets from the opcode where
112
- the character is to be found. ***NOTE*** If the start of this table is
113
- modified, the three tables that follow must also be modified. */
114
-
115
- static const pcre_uint8 coptable[] = {
116
- 0, /* End */
117
- 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
118
- 0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
119
- 0, 0, 0, /* Any, AllAny, Anybyte */
120
- 0, 0, /* \P, \p */
121
- 0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
122
- 0, /* \X */
123
- 0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
124
- 1, /* Char */
125
- 1, /* Chari */
126
- 1, /* not */
127
- 1, /* noti */
128
- /* Positive single-char repeats */
129
- 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
130
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */
131
- 1+IMM2_SIZE, /* exact */
132
- 1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */
133
- 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
134
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */
135
- 1+IMM2_SIZE, /* exact I */
136
- 1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */
137
- /* Negative single-char repeats - only for chars < 256 */
138
- 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
139
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */
140
- 1+IMM2_SIZE, /* NOT exact */
141
- 1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */
142
- 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
143
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */
144
- 1+IMM2_SIZE, /* NOT exact I */
145
- 1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */
146
- /* Positive type repeats */
147
- 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
148
- 1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */
149
- 1+IMM2_SIZE, /* Type exact */
150
- 1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */
151
- /* Character class & ref repeats */
152
- 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
153
- 0, 0, /* CRRANGE, CRMINRANGE */
154
- 0, /* CLASS */
155
- 0, /* NCLASS */
156
- 0, /* XCLASS - variable length */
157
- 0, /* REF */
158
- 0, /* REFI */
159
- 0, /* RECURSE */
160
- 0, /* CALLOUT */
161
- 0, /* Alt */
162
- 0, /* Ket */
163
- 0, /* KetRmax */
164
- 0, /* KetRmin */
165
- 0, /* KetRpos */
166
- 0, /* Reverse */
167
- 0, /* Assert */
168
- 0, /* Assert not */
169
- 0, /* Assert behind */
170
- 0, /* Assert behind not */
171
- 0, 0, /* ONCE, ONCE_NC */
172
- 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
173
- 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
174
- 0, 0, /* CREF, NCREF */
175
- 0, 0, /* RREF, NRREF */
176
- 0, /* DEF */
177
- 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
178
- 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
179
- 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
180
- 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
181
- 0, 0 /* CLOSE, SKIPZERO */
182
- };
183
-
184
- /* This table identifies those opcodes that inspect a character. It is used to
185
- remember the fact that a character could have been inspected when the end of
186
- the subject is reached. ***NOTE*** If the start of this table is modified, the
187
- two tables that follow must also be modified. */
188
-
189
- static const pcre_uint8 poptable[] = {
190
- 0, /* End */
191
- 0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
192
- 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
193
- 1, 1, 1, /* Any, AllAny, Anybyte */
194
- 1, 1, /* \P, \p */
195
- 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
196
- 1, /* \X */
197
- 0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */
198
- 1, /* Char */
199
- 1, /* Chari */
200
- 1, /* not */
201
- 1, /* noti */
202
- /* Positive single-char repeats */
203
- 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
204
- 1, 1, 1, /* upto, minupto, exact */
205
- 1, 1, 1, 1, /* *+, ++, ?+, upto+ */
206
- 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */
207
- 1, 1, 1, /* upto I, minupto I, exact I */
208
- 1, 1, 1, 1, /* *+I, ++I, ?+I, upto+I */
209
- /* Negative single-char repeats - only for chars < 256 */
210
- 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
211
- 1, 1, 1, /* NOT upto, minupto, exact */
212
- 1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
213
- 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */
214
- 1, 1, 1, /* NOT upto I, minupto I, exact I */
215
- 1, 1, 1, 1, /* NOT *+I, ++I, ?+I, upto+I */
216
- /* Positive type repeats */
217
- 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
218
- 1, 1, 1, /* Type upto, minupto, exact */
219
- 1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
220
- /* Character class & ref repeats */
221
- 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
222
- 1, 1, /* CRRANGE, CRMINRANGE */
223
- 1, /* CLASS */
224
- 1, /* NCLASS */
225
- 1, /* XCLASS - variable length */
226
- 0, /* REF */
227
- 0, /* REFI */
228
- 0, /* RECURSE */
229
- 0, /* CALLOUT */
230
- 0, /* Alt */
231
- 0, /* Ket */
232
- 0, /* KetRmax */
233
- 0, /* KetRmin */
234
- 0, /* KetRpos */
235
- 0, /* Reverse */
236
- 0, /* Assert */
237
- 0, /* Assert not */
238
- 0, /* Assert behind */
239
- 0, /* Assert behind not */
240
- 0, 0, /* ONCE, ONCE_NC */
241
- 0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
242
- 0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */
243
- 0, 0, /* CREF, NCREF */
244
- 0, 0, /* RREF, NRREF */
245
- 0, /* DEF */
246
- 0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
247
- 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
248
- 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
249
- 0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
250
- 0, 0 /* CLOSE, SKIPZERO */
251
- };
252
-
253
- /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
254
- and \w */
255
-
256
- static const pcre_uint8 toptable1[] = {
257
- 0, 0, 0, 0, 0, 0,
258
- ctype_digit, ctype_digit,
259
- ctype_space, ctype_space,
260
- ctype_word, ctype_word,
261
- 0, 0 /* OP_ANY, OP_ALLANY */
262
- };
263
-
264
- static const pcre_uint8 toptable2[] = {
265
- 0, 0, 0, 0, 0, 0,
266
- ctype_digit, 0,
267
- ctype_space, 0,
268
- ctype_word, 0,
269
- 1, 1 /* OP_ANY, OP_ALLANY */
270
- };
271
-
272
-
273
- /* Structure for holding data about a particular state, which is in effect the
274
- current data for an active path through the match tree. It must consist
275
- entirely of ints because the working vector we are passed, and which we put
276
- these structures in, is a vector of ints. */
277
-
278
- typedef struct stateblock {
279
- int offset; /* Offset to opcode */
280
- int count; /* Count for repeats */
281
- int data; /* Some use extra data */
282
- } stateblock;
283
-
284
- #define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
285
-
286
-
287
- #ifdef PCRE_DEBUG
288
- /*************************************************
289
- * Print character string *
290
- *************************************************/
291
-
292
- /* Character string printing function for debugging.
293
-
294
- Arguments:
295
- p points to string
296
- length number of bytes
297
- f where to print
298
-
299
- Returns: nothing
300
- */
301
-
302
- static void
303
- pchars(const pcre_uchar *p, int length, FILE *f)
304
- {
305
- pcre_uint32 c;
306
- while (length-- > 0)
307
- {
308
- if (isprint(c = *(p++)))
309
- fprintf(f, "%c", c);
310
- else
311
- fprintf(f, "\\x{%02x}", c);
312
- }
313
- }
314
- #endif
315
-
316
-
317
-
318
- /*************************************************
319
- * Execute a Regular Expression - DFA engine *
320
- *************************************************/
321
-
322
- /* This internal function applies a compiled pattern to a subject string,
323
- starting at a given point, using a DFA engine. This function is called from the
324
- external one, possibly multiple times if the pattern is not anchored. The
325
- function calls itself recursively for some kinds of subpattern.
326
-
327
- Arguments:
328
- md the match_data block with fixed information
329
- this_start_code the opening bracket of this subexpression's code
330
- current_subject where we currently are in the subject string
331
- start_offset start offset in the subject string
332
- offsets vector to contain the matching string offsets
333
- offsetcount size of same
334
- workspace vector of workspace
335
- wscount size of same
336
- rlevel function call recursion level
337
-
338
- Returns: > 0 => number of match offset pairs placed in offsets
339
- = 0 => offsets overflowed; longest matches are present
340
- -1 => failed to match
341
- < -1 => some kind of unexpected problem
342
-
343
- The following macros are used for adding states to the two state vectors (one
344
- for the current character, one for the following character). */
345
-
346
- #define ADD_ACTIVE(x,y) \
347
- if (active_count++ < wscount) \
348
- { \
349
- next_active_state->offset = (x); \
350
- next_active_state->count = (y); \
351
- next_active_state++; \
352
- DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
353
- } \
354
- else return PCRE_ERROR_DFA_WSSIZE
355
-
356
- #define ADD_ACTIVE_DATA(x,y,z) \
357
- if (active_count++ < wscount) \
358
- { \
359
- next_active_state->offset = (x); \
360
- next_active_state->count = (y); \
361
- next_active_state->data = (z); \
362
- next_active_state++; \
363
- DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
364
- } \
365
- else return PCRE_ERROR_DFA_WSSIZE
366
-
367
- #define ADD_NEW(x,y) \
368
- if (new_count++ < wscount) \
369
- { \
370
- next_new_state->offset = (x); \
371
- next_new_state->count = (y); \
372
- next_new_state++; \
373
- DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
374
- } \
375
- else return PCRE_ERROR_DFA_WSSIZE
376
-
377
- #define ADD_NEW_DATA(x,y,z) \
378
- if (new_count++ < wscount) \
379
- { \
380
- next_new_state->offset = (x); \
381
- next_new_state->count = (y); \
382
- next_new_state->data = (z); \
383
- next_new_state++; \
384
- DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
385
- (x), (y), (z), __LINE__)); \
386
- } \
387
- else return PCRE_ERROR_DFA_WSSIZE
388
-
389
- /* And now, here is the code */
390
-
391
- static int
392
- internal_dfa_exec(
393
- dfa_match_data *md,
394
- const pcre_uchar *this_start_code,
395
- const pcre_uchar *current_subject,
396
- int start_offset,
397
- int *offsets,
398
- int offsetcount,
399
- int *workspace,
400
- int wscount,
401
- int rlevel)
402
- {
403
- stateblock *active_states, *new_states, *temp_states;
404
- stateblock *next_active_state, *next_new_state;
405
-
406
- const pcre_uint8 *ctypes, *lcc, *fcc;
407
- const pcre_uchar *ptr;
408
- const pcre_uchar *end_code, *first_op;
409
-
410
- dfa_recursion_info new_recursive;
411
-
412
- int active_count, new_count, match_count;
413
-
414
- /* Some fields in the md block are frequently referenced, so we load them into
415
- independent variables in the hope that this will perform better. */
416
-
417
- const pcre_uchar *start_subject = md->start_subject;
418
- const pcre_uchar *end_subject = md->end_subject;
419
- const pcre_uchar *start_code = md->start_code;
420
-
421
- #ifdef SUPPORT_UTF
422
- BOOL utf = (md->poptions & PCRE_UTF8) != 0;
423
- #else
424
- BOOL utf = FALSE;
425
- #endif
426
-
427
- BOOL reset_could_continue = FALSE;
428
-
429
- rlevel++;
430
- offsetcount &= (-2);
431
-
432
- wscount -= 2;
433
- wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
434
- (2 * INTS_PER_STATEBLOCK);
435
-
436
- DPRINTF(("\n%.*s---------------------\n"
437
- "%.*sCall to internal_dfa_exec f=%d\n",
438
- rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
439
-
440
- ctypes = md->tables + ctypes_offset;
441
- lcc = md->tables + lcc_offset;
442
- fcc = md->tables + fcc_offset;
443
-
444
- match_count = PCRE_ERROR_NOMATCH; /* A negative number */
445
-
446
- active_states = (stateblock *)(workspace + 2);
447
- next_new_state = new_states = active_states + wscount;
448
- new_count = 0;
449
-
450
- first_op = this_start_code + 1 + LINK_SIZE +
451
- ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
452
- *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
453
- ? IMM2_SIZE:0);
454
-
455
- /* The first thing in any (sub) pattern is a bracket of some sort. Push all
456
- the alternative states onto the list, and find out where the end is. This
457
- makes is possible to use this function recursively, when we want to stop at a
458
- matching internal ket rather than at the end.
459
-
460
- If the first opcode in the first alternative is OP_REVERSE, we are dealing with
461
- a backward assertion. In that case, we have to find out the maximum amount to
462
- move back, and set up each alternative appropriately. */
463
-
464
- if (*first_op == OP_REVERSE)
465
- {
466
- int max_back = 0;
467
- int gone_back;
468
-
469
- end_code = this_start_code;
470
- do
471
- {
472
- int back = GET(end_code, 2+LINK_SIZE);
473
- if (back > max_back) max_back = back;
474
- end_code += GET(end_code, 1);
475
- }
476
- while (*end_code == OP_ALT);
477
-
478
- /* If we can't go back the amount required for the longest lookbehind
479
- pattern, go back as far as we can; some alternatives may still be viable. */
480
-
481
- #ifdef SUPPORT_UTF
482
- /* In character mode we have to step back character by character */
483
-
484
- if (utf)
485
- {
486
- for (gone_back = 0; gone_back < max_back; gone_back++)
487
- {
488
- if (current_subject <= start_subject) break;
489
- current_subject--;
490
- ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
491
- }
492
- }
493
- else
494
- #endif
495
-
496
- /* In byte-mode we can do this quickly. */
497
-
498
- {
499
- gone_back = (current_subject - max_back < start_subject)?
500
- (int)(current_subject - start_subject) : max_back;
501
- current_subject -= gone_back;
502
- }
503
-
504
- /* Save the earliest consulted character */
505
-
506
- if (current_subject < md->start_used_ptr)
507
- md->start_used_ptr = current_subject;
508
-
509
- /* Now we can process the individual branches. */
510
-
511
- end_code = this_start_code;
512
- do
513
- {
514
- int back = GET(end_code, 2+LINK_SIZE);
515
- if (back <= gone_back)
516
- {
517
- int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
518
- ADD_NEW_DATA(-bstate, 0, gone_back - back);
519
- }
520
- end_code += GET(end_code, 1);
521
- }
522
- while (*end_code == OP_ALT);
523
- }
524
-
525
- /* This is the code for a "normal" subpattern (not a backward assertion). The
526
- start of a whole pattern is always one of these. If we are at the top level,
527
- we may be asked to restart matching from the same point that we reached for a
528
- previous partial match. We still have to scan through the top-level branches to
529
- find the end state. */
530
-
531
- else
532
- {
533
- end_code = this_start_code;
534
-
535
- /* Restarting */
536
-
537
- if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
538
- {
539
- do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
540
- new_count = workspace[1];
541
- if (!workspace[0])
542
- memcpy(new_states, active_states, new_count * sizeof(stateblock));
543
- }
544
-
545
- /* Not restarting */
546
-
547
- else
548
- {
549
- int length = 1 + LINK_SIZE +
550
- ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
551
- *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
552
- ? IMM2_SIZE:0);
553
- do
554
- {
555
- ADD_NEW((int)(end_code - start_code + length), 0);
556
- end_code += GET(end_code, 1);
557
- length = 1 + LINK_SIZE;
558
- }
559
- while (*end_code == OP_ALT);
560
- }
561
- }
562
-
563
- workspace[0] = 0; /* Bit indicating which vector is current */
564
-
565
- DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
566
-
567
- /* Loop for scanning the subject */
568
-
569
- ptr = current_subject;
570
- for (;;)
571
- {
572
- int i, j;
573
- int clen, dlen;
574
- pcre_uint32 c, d;
575
- int forced_fail = 0;
576
- BOOL partial_newline = FALSE;
577
- BOOL could_continue = reset_could_continue;
578
- reset_could_continue = FALSE;
579
-
580
- /* Make the new state list into the active state list and empty the
581
- new state list. */
582
-
583
- temp_states = active_states;
584
- active_states = new_states;
585
- new_states = temp_states;
586
- active_count = new_count;
587
- new_count = 0;
588
-
589
- workspace[0] ^= 1; /* Remember for the restarting feature */
590
- workspace[1] = active_count;
591
-
592
- #ifdef PCRE_DEBUG
593
- printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
594
- pchars(ptr, STRLEN_UC(ptr), stdout);
595
- printf("\"\n");
596
-
597
- printf("%.*sActive states: ", rlevel*2-2, SP);
598
- for (i = 0; i < active_count; i++)
599
- printf("%d/%d ", active_states[i].offset, active_states[i].count);
600
- printf("\n");
601
- #endif
602
-
603
- /* Set the pointers for adding new states */
604
-
605
- next_active_state = active_states + active_count;
606
- next_new_state = new_states;
607
-
608
- /* Load the current character from the subject outside the loop, as many
609
- different states may want to look at it, and we assume that at least one
610
- will. */
611
-
612
- if (ptr < end_subject)
613
- {
614
- clen = 1; /* Number of data items in the character */
615
- #ifdef SUPPORT_UTF
616
- GETCHARLENTEST(c, ptr, clen);
617
- #else
618
- c = *ptr;
619
- #endif /* SUPPORT_UTF */
620
- }
621
- else
622
- {
623
- clen = 0; /* This indicates the end of the subject */
624
- c = NOTACHAR; /* This value should never actually be used */
625
- }
626
-
627
- /* Scan up the active states and act on each one. The result of an action
628
- may be to add more states to the currently active list (e.g. on hitting a
629
- parenthesis) or it may be to put states on the new list, for considering
630
- when we move the character pointer on. */
631
-
632
- for (i = 0; i < active_count; i++)
633
- {
634
- stateblock *current_state = active_states + i;
635
- BOOL caseless = FALSE;
636
- const pcre_uchar *code;
637
- int state_offset = current_state->offset;
638
- int codevalue, rrc;
639
- unsigned int count;
640
-
641
- #ifdef PCRE_DEBUG
642
- printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
643
- if (clen == 0) printf("EOL\n");
644
- else if (c > 32 && c < 127) printf("'%c'\n", c);
645
- else printf("0x%02x\n", c);
646
- #endif
647
-
648
- /* A negative offset is a special case meaning "hold off going to this
649
- (negated) state until the number of characters in the data field have
650
- been skipped". If the could_continue flag was passed over from a previous
651
- state, arrange for it to passed on. */
652
-
653
- if (state_offset < 0)
654
- {
655
- if (current_state->data > 0)
656
- {
657
- DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
658
- ADD_NEW_DATA(state_offset, current_state->count,
659
- current_state->data - 1);
660
- if (could_continue) reset_could_continue = TRUE;
661
- continue;
662
- }
663
- else
664
- {
665
- current_state->offset = state_offset = -state_offset;
666
- }
667
- }
668
-
669
- /* Check for a duplicate state with the same count, and skip if found.
670
- See the note at the head of this module about the possibility of improving
671
- performance here. */
672
-
673
- for (j = 0; j < i; j++)
674
- {
675
- if (active_states[j].offset == state_offset &&
676
- active_states[j].count == current_state->count)
677
- {
678
- DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
679
- goto NEXT_ACTIVE_STATE;
680
- }
681
- }
682
-
683
- /* The state offset is the offset to the opcode */
684
-
685
- code = start_code + state_offset;
686
- codevalue = *code;
687
-
688
- /* If this opcode inspects a character, but we are at the end of the
689
- subject, remember the fact for use when testing for a partial match. */
690
-
691
- if (clen == 0 && poptable[codevalue] != 0)
692
- could_continue = TRUE;
693
-
694
- /* If this opcode is followed by an inline character, load it. It is
695
- tempting to test for the presence of a subject character here, but that
696
- is wrong, because sometimes zero repetitions of the subject are
697
- permitted.
698
-
699
- We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
700
- argument that is not a data character - but is always one byte long because
701
- the values are small. We have to take special action to deal with \P, \p,
702
- \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
703
- these ones to new opcodes. */
704
-
705
- if (coptable[codevalue] > 0)
706
- {
707
- dlen = 1;
708
- #ifdef SUPPORT_UTF
709
- if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
710
- #endif /* SUPPORT_UTF */
711
- d = code[coptable[codevalue]];
712
- if (codevalue >= OP_TYPESTAR)
713
- {
714
- switch(d)
715
- {
716
- case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
717
- case OP_NOTPROP:
718
- case OP_PROP: codevalue += OP_PROP_EXTRA; break;
719
- case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
720
- case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
721
- case OP_NOT_HSPACE:
722
- case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
723
- case OP_NOT_VSPACE:
724
- case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
725
- default: break;
726
- }
727
- }
728
- }
729
- else
730
- {
731
- dlen = 0; /* Not strictly necessary, but compilers moan */
732
- d = NOTACHAR; /* if these variables are not set. */
733
- }
734
-
735
-
736
- /* Now process the individual opcodes */
737
-
738
- switch (codevalue)
739
- {
740
- /* ========================================================================== */
741
- /* These cases are never obeyed. This is a fudge that causes a compile-
742
- time error if the vectors coptable or poptable, which are indexed by
743
- opcode, are not the correct length. It seems to be the only way to do
744
- such a check at compile time, as the sizeof() operator does not work
745
- in the C preprocessor. */
746
-
747
- case OP_TABLE_LENGTH:
748
- case OP_TABLE_LENGTH +
749
- ((sizeof(coptable) == OP_TABLE_LENGTH) &&
750
- (sizeof(poptable) == OP_TABLE_LENGTH)):
751
- break;
752
-
753
- /* ========================================================================== */
754
- /* Reached a closing bracket. If not at the end of the pattern, carry
755
- on with the next opcode. For repeating opcodes, also add the repeat
756
- state. Note that KETRPOS will always be encountered at the end of the
757
- subpattern, because the possessive subpattern repeats are always handled
758
- using recursive calls. Thus, it never adds any new states.
759
-
760
- At the end of the (sub)pattern, unless we have an empty string and
761
- PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
762
- start of the subject, save the match data, shifting up all previous
763
- matches so we always have the longest first. */
764
-
765
- case OP_KET:
766
- case OP_KETRMIN:
767
- case OP_KETRMAX:
768
- case OP_KETRPOS:
769
- if (code != end_code)
770
- {
771
- ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
772
- if (codevalue != OP_KET)
773
- {
774
- ADD_ACTIVE(state_offset - GET(code, 1), 0);
775
- }
776
- }
777
- else
778
- {
779
- if (ptr > current_subject ||
780
- ((md->moptions & PCRE_NOTEMPTY) == 0 &&
781
- ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
782
- current_subject > start_subject + md->start_offset)))
783
- {
784
- if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
785
- else if (match_count > 0 && ++match_count * 2 > offsetcount)
786
- match_count = 0;
787
- count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
788
- if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
789
- if (offsetcount >= 2)
790
- {
791
- offsets[0] = (int)(current_subject - start_subject);
792
- offsets[1] = (int)(ptr - start_subject);
793
- DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
794
- offsets[1] - offsets[0], (char *)current_subject));
795
- }
796
- if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
797
- {
798
- DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
799
- "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
800
- match_count, rlevel*2-2, SP));
801
- return match_count;
802
- }
803
- }
804
- }
805
- break;
806
-
807
- /* ========================================================================== */
808
- /* These opcodes add to the current list of states without looking
809
- at the current character. */
810
-
811
- /*-----------------------------------------------------------------*/
812
- case OP_ALT:
813
- do { code += GET(code, 1); } while (*code == OP_ALT);
814
- ADD_ACTIVE((int)(code - start_code), 0);
815
- break;
816
-
817
- /*-----------------------------------------------------------------*/
818
- case OP_BRA:
819
- case OP_SBRA:
820
- do
821
- {
822
- ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
823
- code += GET(code, 1);
824
- }
825
- while (*code == OP_ALT);
826
- break;
827
-
828
- /*-----------------------------------------------------------------*/
829
- case OP_CBRA:
830
- case OP_SCBRA:
831
- ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0);
832
- code += GET(code, 1);
833
- while (*code == OP_ALT)
834
- {
835
- ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
836
- code += GET(code, 1);
837
- }
838
- break;
839
-
840
- /*-----------------------------------------------------------------*/
841
- case OP_BRAZERO:
842
- case OP_BRAMINZERO:
843
- ADD_ACTIVE(state_offset + 1, 0);
844
- code += 1 + GET(code, 2);
845
- while (*code == OP_ALT) code += GET(code, 1);
846
- ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
847
- break;
848
-
849
- /*-----------------------------------------------------------------*/
850
- case OP_SKIPZERO:
851
- code += 1 + GET(code, 2);
852
- while (*code == OP_ALT) code += GET(code, 1);
853
- ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
854
- break;
855
-
856
- /*-----------------------------------------------------------------*/
857
- case OP_CIRC:
858
- if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
859
- { ADD_ACTIVE(state_offset + 1, 0); }
860
- break;
861
-
862
- /*-----------------------------------------------------------------*/
863
- case OP_CIRCM:
864
- if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
865
- (ptr != end_subject && WAS_NEWLINE(ptr)))
866
- { ADD_ACTIVE(state_offset + 1, 0); }
867
- break;
868
-
869
- /*-----------------------------------------------------------------*/
870
- case OP_EOD:
871
- if (ptr >= end_subject)
872
- {
873
- if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
874
- could_continue = TRUE;
875
- else { ADD_ACTIVE(state_offset + 1, 0); }
876
- }
877
- break;
878
-
879
- /*-----------------------------------------------------------------*/
880
- case OP_SOD:
881
- if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
882
- break;
883
-
884
- /*-----------------------------------------------------------------*/
885
- case OP_SOM:
886
- if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
887
- break;
888
-
889
-
890
- /* ========================================================================== */
891
- /* These opcodes inspect the next subject character, and sometimes
892
- the previous one as well, but do not have an argument. The variable
893
- clen contains the length of the current character and is zero if we are
894
- at the end of the subject. */
895
-
896
- /*-----------------------------------------------------------------*/
897
- case OP_ANY:
898
- if (clen > 0 && !IS_NEWLINE(ptr))
899
- {
900
- if (ptr + 1 >= md->end_subject &&
901
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
902
- NLBLOCK->nltype == NLTYPE_FIXED &&
903
- NLBLOCK->nllen == 2 &&
904
- c == NLBLOCK->nl[0])
905
- {
906
- could_continue = partial_newline = TRUE;
907
- }
908
- else
909
- {
910
- ADD_NEW(state_offset + 1, 0);
911
- }
912
- }
913
- break;
914
-
915
- /*-----------------------------------------------------------------*/
916
- case OP_ALLANY:
917
- if (clen > 0)
918
- { ADD_NEW(state_offset + 1, 0); }
919
- break;
920
-
921
- /*-----------------------------------------------------------------*/
922
- case OP_EODN:
923
- if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
924
- could_continue = TRUE;
925
- else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
926
- { ADD_ACTIVE(state_offset + 1, 0); }
927
- break;
928
-
929
- /*-----------------------------------------------------------------*/
930
- case OP_DOLL:
931
- if ((md->moptions & PCRE_NOTEOL) == 0)
932
- {
933
- if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
934
- could_continue = TRUE;
935
- else if (clen == 0 ||
936
- ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
937
- (ptr == end_subject - md->nllen)
938
- ))
939
- { ADD_ACTIVE(state_offset + 1, 0); }
940
- else if (ptr + 1 >= md->end_subject &&
941
- (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
942
- NLBLOCK->nltype == NLTYPE_FIXED &&
943
- NLBLOCK->nllen == 2 &&
944
- c == NLBLOCK->nl[0])
945
- {
946
- if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
947
- {
948
- reset_could_continue = TRUE;
949
- ADD_NEW_DATA(-(state_offset + 1), 0, 1);
950
- }
951
- else could_continue = partial_newline = TRUE;
952
- }
953
- }
954
- break;
955
-
956
- /*-----------------------------------------------------------------*/
957
- case OP_DOLLM:
958
- if ((md->moptions & PCRE_NOTEOL) == 0)
959
- {
960
- if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
961
- could_continue = TRUE;
962
- else if (clen == 0 ||
963
- ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
964
- { ADD_ACTIVE(state_offset + 1, 0); }
965
- else if (ptr + 1 >= md->end_subject &&
966
- (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
967
- NLBLOCK->nltype == NLTYPE_FIXED &&
968
- NLBLOCK->nllen == 2 &&
969
- c == NLBLOCK->nl[0])
970
- {
971
- if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
972
- {
973
- reset_could_continue = TRUE;
974
- ADD_NEW_DATA(-(state_offset + 1), 0, 1);
975
- }
976
- else could_continue = partial_newline = TRUE;
977
- }
978
- }
979
- else if (IS_NEWLINE(ptr))
980
- { ADD_ACTIVE(state_offset + 1, 0); }
981
- break;
982
-
983
- /*-----------------------------------------------------------------*/
984
-
985
- case OP_DIGIT:
986
- case OP_WHITESPACE:
987
- case OP_WORDCHAR:
988
- if (clen > 0 && c < 256 &&
989
- ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
990
- { ADD_NEW(state_offset + 1, 0); }
991
- break;
992
-
993
- /*-----------------------------------------------------------------*/
994
- case OP_NOT_DIGIT:
995
- case OP_NOT_WHITESPACE:
996
- case OP_NOT_WORDCHAR:
997
- if (clen > 0 && (c >= 256 ||
998
- ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
999
- { ADD_NEW(state_offset + 1, 0); }
1000
- break;
1001
-
1002
- /*-----------------------------------------------------------------*/
1003
- case OP_WORD_BOUNDARY:
1004
- case OP_NOT_WORD_BOUNDARY:
1005
- {
1006
- int left_word, right_word;
1007
-
1008
- if (ptr > start_subject)
1009
- {
1010
- const pcre_uchar *temp = ptr - 1;
1011
- if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1012
- #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1013
- if (utf) { BACKCHAR(temp); }
1014
- #endif
1015
- GETCHARTEST(d, temp);
1016
- #ifdef SUPPORT_UCP
1017
- if ((md->poptions & PCRE_UCP) != 0)
1018
- {
1019
- if (d == '_') left_word = TRUE; else
1020
- {
1021
- int cat = UCD_CATEGORY(d);
1022
- left_word = (cat == ucp_L || cat == ucp_N);
1023
- }
1024
- }
1025
- else
1026
- #endif
1027
- left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
1028
- }
1029
- else left_word = FALSE;
1030
-
1031
- if (clen > 0)
1032
- {
1033
- #ifdef SUPPORT_UCP
1034
- if ((md->poptions & PCRE_UCP) != 0)
1035
- {
1036
- if (c == '_') right_word = TRUE; else
1037
- {
1038
- int cat = UCD_CATEGORY(c);
1039
- right_word = (cat == ucp_L || cat == ucp_N);
1040
- }
1041
- }
1042
- else
1043
- #endif
1044
- right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
1045
- }
1046
- else right_word = FALSE;
1047
-
1048
- if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
1049
- { ADD_ACTIVE(state_offset + 1, 0); }
1050
- }
1051
- break;
1052
-
1053
-
1054
- /*-----------------------------------------------------------------*/
1055
- /* Check the next character by Unicode property. We will get here only
1056
- if the support is in the binary; otherwise a compile-time error occurs.
1057
- */
1058
-
1059
- #ifdef SUPPORT_UCP
1060
- case OP_PROP:
1061
- case OP_NOTPROP:
1062
- if (clen > 0)
1063
- {
1064
- BOOL OK;
1065
- const pcre_uint32 *cp;
1066
- const ucd_record * prop = GET_UCD(c);
1067
- switch(code[1])
1068
- {
1069
- case PT_ANY:
1070
- OK = TRUE;
1071
- break;
1072
-
1073
- case PT_LAMP:
1074
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1075
- prop->chartype == ucp_Lt;
1076
- break;
1077
-
1078
- case PT_GC:
1079
- OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1080
- break;
1081
-
1082
- case PT_PC:
1083
- OK = prop->chartype == code[2];
1084
- break;
1085
-
1086
- case PT_SC:
1087
- OK = prop->script == code[2];
1088
- break;
1089
-
1090
- /* These are specials for combination cases. */
1091
-
1092
- case PT_ALNUM:
1093
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1094
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1095
- break;
1096
-
1097
- case PT_SPACE: /* Perl space */
1098
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1099
- c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1100
- break;
1101
-
1102
- case PT_PXSPACE: /* POSIX space */
1103
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1104
- c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1105
- c == CHAR_FF || c == CHAR_CR;
1106
- break;
1107
-
1108
- case PT_WORD:
1109
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1110
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1111
- c == CHAR_UNDERSCORE;
1112
- break;
1113
-
1114
- case PT_CLIST:
1115
- cp = PRIV(ucd_caseless_sets) + code[2];
1116
- for (;;)
1117
- {
1118
- if (c < *cp) { OK = FALSE; break; }
1119
- if (c == *cp++) { OK = TRUE; break; }
1120
- }
1121
- break;
1122
-
1123
- /* Should never occur, but keep compilers from grumbling. */
1124
-
1125
- default:
1126
- OK = codevalue != OP_PROP;
1127
- break;
1128
- }
1129
-
1130
- if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
1131
- }
1132
- break;
1133
- #endif
1134
-
1135
-
1136
-
1137
- /* ========================================================================== */
1138
- /* These opcodes likewise inspect the subject character, but have an
1139
- argument that is not a data character. It is one of these opcodes:
1140
- OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
1141
- OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
1142
-
1143
- case OP_TYPEPLUS:
1144
- case OP_TYPEMINPLUS:
1145
- case OP_TYPEPOSPLUS:
1146
- count = current_state->count; /* Already matched */
1147
- if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1148
- if (clen > 0)
1149
- {
1150
- if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1151
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1152
- NLBLOCK->nltype == NLTYPE_FIXED &&
1153
- NLBLOCK->nllen == 2 &&
1154
- c == NLBLOCK->nl[0])
1155
- {
1156
- could_continue = partial_newline = TRUE;
1157
- }
1158
- else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1159
- (c < 256 &&
1160
- (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1161
- ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1162
- {
1163
- if (count > 0 && codevalue == OP_TYPEPOSPLUS)
1164
- {
1165
- active_count--; /* Remove non-match possibility */
1166
- next_active_state--;
1167
- }
1168
- count++;
1169
- ADD_NEW(state_offset, count);
1170
- }
1171
- }
1172
- break;
1173
-
1174
- /*-----------------------------------------------------------------*/
1175
- case OP_TYPEQUERY:
1176
- case OP_TYPEMINQUERY:
1177
- case OP_TYPEPOSQUERY:
1178
- ADD_ACTIVE(state_offset + 2, 0);
1179
- if (clen > 0)
1180
- {
1181
- if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1182
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1183
- NLBLOCK->nltype == NLTYPE_FIXED &&
1184
- NLBLOCK->nllen == 2 &&
1185
- c == NLBLOCK->nl[0])
1186
- {
1187
- could_continue = partial_newline = TRUE;
1188
- }
1189
- else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1190
- (c < 256 &&
1191
- (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1192
- ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1193
- {
1194
- if (codevalue == OP_TYPEPOSQUERY)
1195
- {
1196
- active_count--; /* Remove non-match possibility */
1197
- next_active_state--;
1198
- }
1199
- ADD_NEW(state_offset + 2, 0);
1200
- }
1201
- }
1202
- break;
1203
-
1204
- /*-----------------------------------------------------------------*/
1205
- case OP_TYPESTAR:
1206
- case OP_TYPEMINSTAR:
1207
- case OP_TYPEPOSSTAR:
1208
- ADD_ACTIVE(state_offset + 2, 0);
1209
- if (clen > 0)
1210
- {
1211
- if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1212
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1213
- NLBLOCK->nltype == NLTYPE_FIXED &&
1214
- NLBLOCK->nllen == 2 &&
1215
- c == NLBLOCK->nl[0])
1216
- {
1217
- could_continue = partial_newline = TRUE;
1218
- }
1219
- else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1220
- (c < 256 &&
1221
- (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1222
- ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1223
- {
1224
- if (codevalue == OP_TYPEPOSSTAR)
1225
- {
1226
- active_count--; /* Remove non-match possibility */
1227
- next_active_state--;
1228
- }
1229
- ADD_NEW(state_offset, 0);
1230
- }
1231
- }
1232
- break;
1233
-
1234
- /*-----------------------------------------------------------------*/
1235
- case OP_TYPEEXACT:
1236
- count = current_state->count; /* Number already matched */
1237
- if (clen > 0)
1238
- {
1239
- if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1240
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1241
- NLBLOCK->nltype == NLTYPE_FIXED &&
1242
- NLBLOCK->nllen == 2 &&
1243
- c == NLBLOCK->nl[0])
1244
- {
1245
- could_continue = partial_newline = TRUE;
1246
- }
1247
- else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1248
- (c < 256 &&
1249
- (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1250
- ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1251
- {
1252
- if (++count >= GET2(code, 1))
1253
- { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1254
- else
1255
- { ADD_NEW(state_offset, count); }
1256
- }
1257
- }
1258
- break;
1259
-
1260
- /*-----------------------------------------------------------------*/
1261
- case OP_TYPEUPTO:
1262
- case OP_TYPEMINUPTO:
1263
- case OP_TYPEPOSUPTO:
1264
- ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1265
- count = current_state->count; /* Number already matched */
1266
- if (clen > 0)
1267
- {
1268
- if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1269
- (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1270
- NLBLOCK->nltype == NLTYPE_FIXED &&
1271
- NLBLOCK->nllen == 2 &&
1272
- c == NLBLOCK->nl[0])
1273
- {
1274
- could_continue = partial_newline = TRUE;
1275
- }
1276
- else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1277
- (c < 256 &&
1278
- (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1279
- ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1280
- {
1281
- if (codevalue == OP_TYPEPOSUPTO)
1282
- {
1283
- active_count--; /* Remove non-match possibility */
1284
- next_active_state--;
1285
- }
1286
- if (++count >= GET2(code, 1))
1287
- { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1288
- else
1289
- { ADD_NEW(state_offset, count); }
1290
- }
1291
- }
1292
- break;
1293
-
1294
- /* ========================================================================== */
1295
- /* These are virtual opcodes that are used when something like
1296
- OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
1297
- argument. It keeps the code above fast for the other cases. The argument
1298
- is in the d variable. */
1299
-
1300
- #ifdef SUPPORT_UCP
1301
- case OP_PROP_EXTRA + OP_TYPEPLUS:
1302
- case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1303
- case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
1304
- count = current_state->count; /* Already matched */
1305
- if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
1306
- if (clen > 0)
1307
- {
1308
- BOOL OK;
1309
- const pcre_uint32 *cp;
1310
- const ucd_record * prop = GET_UCD(c);
1311
- switch(code[2])
1312
- {
1313
- case PT_ANY:
1314
- OK = TRUE;
1315
- break;
1316
-
1317
- case PT_LAMP:
1318
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1319
- prop->chartype == ucp_Lt;
1320
- break;
1321
-
1322
- case PT_GC:
1323
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1324
- break;
1325
-
1326
- case PT_PC:
1327
- OK = prop->chartype == code[3];
1328
- break;
1329
-
1330
- case PT_SC:
1331
- OK = prop->script == code[3];
1332
- break;
1333
-
1334
- /* These are specials for combination cases. */
1335
-
1336
- case PT_ALNUM:
1337
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1338
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1339
- break;
1340
-
1341
- case PT_SPACE: /* Perl space */
1342
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1343
- c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1344
- break;
1345
-
1346
- case PT_PXSPACE: /* POSIX space */
1347
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1348
- c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1349
- c == CHAR_FF || c == CHAR_CR;
1350
- break;
1351
-
1352
- case PT_WORD:
1353
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1354
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1355
- c == CHAR_UNDERSCORE;
1356
- break;
1357
-
1358
- case PT_CLIST:
1359
- cp = PRIV(ucd_caseless_sets) + code[3];
1360
- for (;;)
1361
- {
1362
- if (c < *cp) { OK = FALSE; break; }
1363
- if (c == *cp++) { OK = TRUE; break; }
1364
- }
1365
- break;
1366
-
1367
- /* Should never occur, but keep compilers from grumbling. */
1368
-
1369
- default:
1370
- OK = codevalue != OP_PROP;
1371
- break;
1372
- }
1373
-
1374
- if (OK == (d == OP_PROP))
1375
- {
1376
- if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1377
- {
1378
- active_count--; /* Remove non-match possibility */
1379
- next_active_state--;
1380
- }
1381
- count++;
1382
- ADD_NEW(state_offset, count);
1383
- }
1384
- }
1385
- break;
1386
-
1387
- /*-----------------------------------------------------------------*/
1388
- case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1389
- case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1390
- case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1391
- count = current_state->count; /* Already matched */
1392
- if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1393
- if (clen > 0)
1394
- {
1395
- int lgb, rgb;
1396
- const pcre_uchar *nptr = ptr + clen;
1397
- int ncount = 0;
1398
- if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1399
- {
1400
- active_count--; /* Remove non-match possibility */
1401
- next_active_state--;
1402
- }
1403
- lgb = UCD_GRAPHBREAK(c);
1404
- while (nptr < end_subject)
1405
- {
1406
- dlen = 1;
1407
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1408
- rgb = UCD_GRAPHBREAK(d);
1409
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1410
- ncount++;
1411
- lgb = rgb;
1412
- nptr += dlen;
1413
- }
1414
- count++;
1415
- ADD_NEW_DATA(-state_offset, count, ncount);
1416
- }
1417
- break;
1418
- #endif
1419
-
1420
- /*-----------------------------------------------------------------*/
1421
- case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1422
- case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1423
- case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1424
- count = current_state->count; /* Already matched */
1425
- if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1426
- if (clen > 0)
1427
- {
1428
- int ncount = 0;
1429
- switch (c)
1430
- {
1431
- case CHAR_VT:
1432
- case CHAR_FF:
1433
- case CHAR_NEL:
1434
- #ifndef EBCDIC
1435
- case 0x2028:
1436
- case 0x2029:
1437
- #endif /* Not EBCDIC */
1438
- if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1439
- goto ANYNL01;
1440
-
1441
- case CHAR_CR:
1442
- if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1443
- /* Fall through */
1444
-
1445
- ANYNL01:
1446
- case CHAR_LF:
1447
- if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1448
- {
1449
- active_count--; /* Remove non-match possibility */
1450
- next_active_state--;
1451
- }
1452
- count++;
1453
- ADD_NEW_DATA(-state_offset, count, ncount);
1454
- break;
1455
-
1456
- default:
1457
- break;
1458
- }
1459
- }
1460
- break;
1461
-
1462
- /*-----------------------------------------------------------------*/
1463
- case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1464
- case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1465
- case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1466
- count = current_state->count; /* Already matched */
1467
- if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1468
- if (clen > 0)
1469
- {
1470
- BOOL OK;
1471
- switch (c)
1472
- {
1473
- VSPACE_CASES:
1474
- OK = TRUE;
1475
- break;
1476
-
1477
- default:
1478
- OK = FALSE;
1479
- break;
1480
- }
1481
-
1482
- if (OK == (d == OP_VSPACE))
1483
- {
1484
- if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1485
- {
1486
- active_count--; /* Remove non-match possibility */
1487
- next_active_state--;
1488
- }
1489
- count++;
1490
- ADD_NEW_DATA(-state_offset, count, 0);
1491
- }
1492
- }
1493
- break;
1494
-
1495
- /*-----------------------------------------------------------------*/
1496
- case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1497
- case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1498
- case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1499
- count = current_state->count; /* Already matched */
1500
- if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1501
- if (clen > 0)
1502
- {
1503
- BOOL OK;
1504
- switch (c)
1505
- {
1506
- HSPACE_CASES:
1507
- OK = TRUE;
1508
- break;
1509
-
1510
- default:
1511
- OK = FALSE;
1512
- break;
1513
- }
1514
-
1515
- if (OK == (d == OP_HSPACE))
1516
- {
1517
- if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1518
- {
1519
- active_count--; /* Remove non-match possibility */
1520
- next_active_state--;
1521
- }
1522
- count++;
1523
- ADD_NEW_DATA(-state_offset, count, 0);
1524
- }
1525
- }
1526
- break;
1527
-
1528
- /*-----------------------------------------------------------------*/
1529
- #ifdef SUPPORT_UCP
1530
- case OP_PROP_EXTRA + OP_TYPEQUERY:
1531
- case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1532
- case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1533
- count = 4;
1534
- goto QS1;
1535
-
1536
- case OP_PROP_EXTRA + OP_TYPESTAR:
1537
- case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1538
- case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1539
- count = 0;
1540
-
1541
- QS1:
1542
-
1543
- ADD_ACTIVE(state_offset + 4, 0);
1544
- if (clen > 0)
1545
- {
1546
- BOOL OK;
1547
- const pcre_uint32 *cp;
1548
- const ucd_record * prop = GET_UCD(c);
1549
- switch(code[2])
1550
- {
1551
- case PT_ANY:
1552
- OK = TRUE;
1553
- break;
1554
-
1555
- case PT_LAMP:
1556
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1557
- prop->chartype == ucp_Lt;
1558
- break;
1559
-
1560
- case PT_GC:
1561
- OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1562
- break;
1563
-
1564
- case PT_PC:
1565
- OK = prop->chartype == code[3];
1566
- break;
1567
-
1568
- case PT_SC:
1569
- OK = prop->script == code[3];
1570
- break;
1571
-
1572
- /* These are specials for combination cases. */
1573
-
1574
- case PT_ALNUM:
1575
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1576
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1577
- break;
1578
-
1579
- case PT_SPACE: /* Perl space */
1580
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1581
- c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1582
- break;
1583
-
1584
- case PT_PXSPACE: /* POSIX space */
1585
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1586
- c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1587
- c == CHAR_FF || c == CHAR_CR;
1588
- break;
1589
-
1590
- case PT_WORD:
1591
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1592
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1593
- c == CHAR_UNDERSCORE;
1594
- break;
1595
-
1596
- case PT_CLIST:
1597
- cp = PRIV(ucd_caseless_sets) + code[3];
1598
- for (;;)
1599
- {
1600
- if (c < *cp) { OK = FALSE; break; }
1601
- if (c == *cp++) { OK = TRUE; break; }
1602
- }
1603
- break;
1604
-
1605
- /* Should never occur, but keep compilers from grumbling. */
1606
-
1607
- default:
1608
- OK = codevalue != OP_PROP;
1609
- break;
1610
- }
1611
-
1612
- if (OK == (d == OP_PROP))
1613
- {
1614
- if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1615
- codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1616
- {
1617
- active_count--; /* Remove non-match possibility */
1618
- next_active_state--;
1619
- }
1620
- ADD_NEW(state_offset + count, 0);
1621
- }
1622
- }
1623
- break;
1624
-
1625
- /*-----------------------------------------------------------------*/
1626
- case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1627
- case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1628
- case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1629
- count = 2;
1630
- goto QS2;
1631
-
1632
- case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1633
- case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1634
- case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1635
- count = 0;
1636
-
1637
- QS2:
1638
-
1639
- ADD_ACTIVE(state_offset + 2, 0);
1640
- if (clen > 0)
1641
- {
1642
- int lgb, rgb;
1643
- const pcre_uchar *nptr = ptr + clen;
1644
- int ncount = 0;
1645
- if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1646
- codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1647
- {
1648
- active_count--; /* Remove non-match possibility */
1649
- next_active_state--;
1650
- }
1651
- lgb = UCD_GRAPHBREAK(c);
1652
- while (nptr < end_subject)
1653
- {
1654
- dlen = 1;
1655
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1656
- rgb = UCD_GRAPHBREAK(d);
1657
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1658
- ncount++;
1659
- lgb = rgb;
1660
- nptr += dlen;
1661
- }
1662
- ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1663
- }
1664
- break;
1665
- #endif
1666
-
1667
- /*-----------------------------------------------------------------*/
1668
- case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1669
- case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1670
- case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1671
- count = 2;
1672
- goto QS3;
1673
-
1674
- case OP_ANYNL_EXTRA + OP_TYPESTAR:
1675
- case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1676
- case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1677
- count = 0;
1678
-
1679
- QS3:
1680
- ADD_ACTIVE(state_offset + 2, 0);
1681
- if (clen > 0)
1682
- {
1683
- int ncount = 0;
1684
- switch (c)
1685
- {
1686
- case CHAR_VT:
1687
- case CHAR_FF:
1688
- case CHAR_NEL:
1689
- #ifndef EBCDIC
1690
- case 0x2028:
1691
- case 0x2029:
1692
- #endif /* Not EBCDIC */
1693
- if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1694
- goto ANYNL02;
1695
-
1696
- case CHAR_CR:
1697
- if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1698
- /* Fall through */
1699
-
1700
- ANYNL02:
1701
- case CHAR_LF:
1702
- if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1703
- codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1704
- {
1705
- active_count--; /* Remove non-match possibility */
1706
- next_active_state--;
1707
- }
1708
- ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1709
- break;
1710
-
1711
- default:
1712
- break;
1713
- }
1714
- }
1715
- break;
1716
-
1717
- /*-----------------------------------------------------------------*/
1718
- case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1719
- case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1720
- case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1721
- count = 2;
1722
- goto QS4;
1723
-
1724
- case OP_VSPACE_EXTRA + OP_TYPESTAR:
1725
- case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1726
- case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1727
- count = 0;
1728
-
1729
- QS4:
1730
- ADD_ACTIVE(state_offset + 2, 0);
1731
- if (clen > 0)
1732
- {
1733
- BOOL OK;
1734
- switch (c)
1735
- {
1736
- VSPACE_CASES:
1737
- OK = TRUE;
1738
- break;
1739
-
1740
- default:
1741
- OK = FALSE;
1742
- break;
1743
- }
1744
- if (OK == (d == OP_VSPACE))
1745
- {
1746
- if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1747
- codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1748
- {
1749
- active_count--; /* Remove non-match possibility */
1750
- next_active_state--;
1751
- }
1752
- ADD_NEW_DATA(-(state_offset + count), 0, 0);
1753
- }
1754
- }
1755
- break;
1756
-
1757
- /*-----------------------------------------------------------------*/
1758
- case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1759
- case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1760
- case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1761
- count = 2;
1762
- goto QS5;
1763
-
1764
- case OP_HSPACE_EXTRA + OP_TYPESTAR:
1765
- case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1766
- case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1767
- count = 0;
1768
-
1769
- QS5:
1770
- ADD_ACTIVE(state_offset + 2, 0);
1771
- if (clen > 0)
1772
- {
1773
- BOOL OK;
1774
- switch (c)
1775
- {
1776
- HSPACE_CASES:
1777
- OK = TRUE;
1778
- break;
1779
-
1780
- default:
1781
- OK = FALSE;
1782
- break;
1783
- }
1784
-
1785
- if (OK == (d == OP_HSPACE))
1786
- {
1787
- if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1788
- codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1789
- {
1790
- active_count--; /* Remove non-match possibility */
1791
- next_active_state--;
1792
- }
1793
- ADD_NEW_DATA(-(state_offset + count), 0, 0);
1794
- }
1795
- }
1796
- break;
1797
-
1798
- /*-----------------------------------------------------------------*/
1799
- #ifdef SUPPORT_UCP
1800
- case OP_PROP_EXTRA + OP_TYPEEXACT:
1801
- case OP_PROP_EXTRA + OP_TYPEUPTO:
1802
- case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1803
- case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1804
- if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1805
- { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1806
- count = current_state->count; /* Number already matched */
1807
- if (clen > 0)
1808
- {
1809
- BOOL OK;
1810
- const pcre_uint32 *cp;
1811
- const ucd_record * prop = GET_UCD(c);
1812
- switch(code[1 + IMM2_SIZE + 1])
1813
- {
1814
- case PT_ANY:
1815
- OK = TRUE;
1816
- break;
1817
-
1818
- case PT_LAMP:
1819
- OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1820
- prop->chartype == ucp_Lt;
1821
- break;
1822
-
1823
- case PT_GC:
1824
- OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1825
- break;
1826
-
1827
- case PT_PC:
1828
- OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1829
- break;
1830
-
1831
- case PT_SC:
1832
- OK = prop->script == code[1 + IMM2_SIZE + 2];
1833
- break;
1834
-
1835
- /* These are specials for combination cases. */
1836
-
1837
- case PT_ALNUM:
1838
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1839
- PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1840
- break;
1841
-
1842
- case PT_SPACE: /* Perl space */
1843
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1844
- c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1845
- break;
1846
-
1847
- case PT_PXSPACE: /* POSIX space */
1848
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1849
- c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1850
- c == CHAR_FF || c == CHAR_CR;
1851
- break;
1852
-
1853
- case PT_WORD:
1854
- OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1855
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1856
- c == CHAR_UNDERSCORE;
1857
- break;
1858
-
1859
- case PT_CLIST:
1860
- cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
1861
- for (;;)
1862
- {
1863
- if (c < *cp) { OK = FALSE; break; }
1864
- if (c == *cp++) { OK = TRUE; break; }
1865
- }
1866
- break;
1867
-
1868
- /* Should never occur, but keep compilers from grumbling. */
1869
-
1870
- default:
1871
- OK = codevalue != OP_PROP;
1872
- break;
1873
- }
1874
-
1875
- if (OK == (d == OP_PROP))
1876
- {
1877
- if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1878
- {
1879
- active_count--; /* Remove non-match possibility */
1880
- next_active_state--;
1881
- }
1882
- if (++count >= GET2(code, 1))
1883
- { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1884
- else
1885
- { ADD_NEW(state_offset, count); }
1886
- }
1887
- }
1888
- break;
1889
-
1890
- /*-----------------------------------------------------------------*/
1891
- case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1892
- case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1893
- case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1894
- case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1895
- if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1896
- { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1897
- count = current_state->count; /* Number already matched */
1898
- if (clen > 0)
1899
- {
1900
- int lgb, rgb;
1901
- const pcre_uchar *nptr = ptr + clen;
1902
- int ncount = 0;
1903
- if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1904
- {
1905
- active_count--; /* Remove non-match possibility */
1906
- next_active_state--;
1907
- }
1908
- lgb = UCD_GRAPHBREAK(c);
1909
- while (nptr < end_subject)
1910
- {
1911
- dlen = 1;
1912
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1913
- rgb = UCD_GRAPHBREAK(d);
1914
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1915
- ncount++;
1916
- lgb = rgb;
1917
- nptr += dlen;
1918
- }
1919
- if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1920
- reset_could_continue = TRUE;
1921
- if (++count >= GET2(code, 1))
1922
- { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1923
- else
1924
- { ADD_NEW_DATA(-state_offset, count, ncount); }
1925
- }
1926
- break;
1927
- #endif
1928
-
1929
- /*-----------------------------------------------------------------*/
1930
- case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1931
- case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1932
- case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1933
- case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1934
- if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1935
- { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1936
- count = current_state->count; /* Number already matched */
1937
- if (clen > 0)
1938
- {
1939
- int ncount = 0;
1940
- switch (c)
1941
- {
1942
- case CHAR_VT:
1943
- case CHAR_FF:
1944
- case CHAR_NEL:
1945
- #ifndef EBCDIC
1946
- case 0x2028:
1947
- case 0x2029:
1948
- #endif /* Not EBCDIC */
1949
- if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1950
- goto ANYNL03;
1951
-
1952
- case CHAR_CR:
1953
- if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1954
- /* Fall through */
1955
-
1956
- ANYNL03:
1957
- case CHAR_LF:
1958
- if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1959
- {
1960
- active_count--; /* Remove non-match possibility */
1961
- next_active_state--;
1962
- }
1963
- if (++count >= GET2(code, 1))
1964
- { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1965
- else
1966
- { ADD_NEW_DATA(-state_offset, count, ncount); }
1967
- break;
1968
-
1969
- default:
1970
- break;
1971
- }
1972
- }
1973
- break;
1974
-
1975
- /*-----------------------------------------------------------------*/
1976
- case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1977
- case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1978
- case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1979
- case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1980
- if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1981
- { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1982
- count = current_state->count; /* Number already matched */
1983
- if (clen > 0)
1984
- {
1985
- BOOL OK;
1986
- switch (c)
1987
- {
1988
- VSPACE_CASES:
1989
- OK = TRUE;
1990
- break;
1991
-
1992
- default:
1993
- OK = FALSE;
1994
- }
1995
-
1996
- if (OK == (d == OP_VSPACE))
1997
- {
1998
- if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1999
- {
2000
- active_count--; /* Remove non-match possibility */
2001
- next_active_state--;
2002
- }
2003
- if (++count >= GET2(code, 1))
2004
- { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2005
- else
2006
- { ADD_NEW_DATA(-state_offset, count, 0); }
2007
- }
2008
- }
2009
- break;
2010
-
2011
- /*-----------------------------------------------------------------*/
2012
- case OP_HSPACE_EXTRA + OP_TYPEEXACT:
2013
- case OP_HSPACE_EXTRA + OP_TYPEUPTO:
2014
- case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2015
- case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2016
- if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2017
- { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2018
- count = current_state->count; /* Number already matched */
2019
- if (clen > 0)
2020
- {
2021
- BOOL OK;
2022
- switch (c)
2023
- {
2024
- HSPACE_CASES:
2025
- OK = TRUE;
2026
- break;
2027
-
2028
- default:
2029
- OK = FALSE;
2030
- break;
2031
- }
2032
-
2033
- if (OK == (d == OP_HSPACE))
2034
- {
2035
- if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
2036
- {
2037
- active_count--; /* Remove non-match possibility */
2038
- next_active_state--;
2039
- }
2040
- if (++count >= GET2(code, 1))
2041
- { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2042
- else
2043
- { ADD_NEW_DATA(-state_offset, count, 0); }
2044
- }
2045
- }
2046
- break;
2047
-
2048
- /* ========================================================================== */
2049
- /* These opcodes are followed by a character that is usually compared
2050
- to the current subject character; it is loaded into d. We still get
2051
- here even if there is no subject character, because in some cases zero
2052
- repetitions are permitted. */
2053
-
2054
- /*-----------------------------------------------------------------*/
2055
- case OP_CHAR:
2056
- if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
2057
- break;
2058
-
2059
- /*-----------------------------------------------------------------*/
2060
- case OP_CHARI:
2061
- if (clen == 0) break;
2062
-
2063
- #ifdef SUPPORT_UTF
2064
- if (utf)
2065
- {
2066
- if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2067
- {
2068
- unsigned int othercase;
2069
- if (c < 128)
2070
- othercase = fcc[c];
2071
- else
2072
- /* If we have Unicode property support, we can use it to test the
2073
- other case of the character. */
2074
- #ifdef SUPPORT_UCP
2075
- othercase = UCD_OTHERCASE(c);
2076
- #else
2077
- othercase = NOTACHAR;
2078
- #endif
2079
-
2080
- if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2081
- }
2082
- }
2083
- else
2084
- #endif /* SUPPORT_UTF */
2085
- /* Not UTF mode */
2086
- {
2087
- if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2088
- { ADD_NEW(state_offset + 2, 0); }
2089
- }
2090
- break;
2091
-
2092
-
2093
- #ifdef SUPPORT_UCP
2094
- /*-----------------------------------------------------------------*/
2095
- /* This is a tricky one because it can match more than one character.
2096
- Find out how many characters to skip, and then set up a negative state
2097
- to wait for them to pass before continuing. */
2098
-
2099
- case OP_EXTUNI:
2100
- if (clen > 0)
2101
- {
2102
- int lgb, rgb;
2103
- const pcre_uchar *nptr = ptr + clen;
2104
- int ncount = 0;
2105
- lgb = UCD_GRAPHBREAK(c);
2106
- while (nptr < end_subject)
2107
- {
2108
- dlen = 1;
2109
- if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2110
- rgb = UCD_GRAPHBREAK(d);
2111
- if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2112
- ncount++;
2113
- lgb = rgb;
2114
- nptr += dlen;
2115
- }
2116
- if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2117
- reset_could_continue = TRUE;
2118
- ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2119
- }
2120
- break;
2121
- #endif
2122
-
2123
- /*-----------------------------------------------------------------*/
2124
- /* This is a tricky like EXTUNI because it too can match more than one
2125
- character (when CR is followed by LF). In this case, set up a negative
2126
- state to wait for one character to pass before continuing. */
2127
-
2128
- case OP_ANYNL:
2129
- if (clen > 0) switch(c)
2130
- {
2131
- case CHAR_VT:
2132
- case CHAR_FF:
2133
- case CHAR_NEL:
2134
- #ifndef EBCDIC
2135
- case 0x2028:
2136
- case 0x2029:
2137
- #endif /* Not EBCDIC */
2138
- if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2139
-
2140
- case CHAR_LF:
2141
- ADD_NEW(state_offset + 1, 0);
2142
- break;
2143
-
2144
- case CHAR_CR:
2145
- if (ptr + 1 >= end_subject)
2146
- {
2147
- ADD_NEW(state_offset + 1, 0);
2148
- if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2149
- reset_could_continue = TRUE;
2150
- }
2151
- else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
2152
- {
2153
- ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2154
- }
2155
- else
2156
- {
2157
- ADD_NEW(state_offset + 1, 0);
2158
- }
2159
- break;
2160
- }
2161
- break;
2162
-
2163
- /*-----------------------------------------------------------------*/
2164
- case OP_NOT_VSPACE:
2165
- if (clen > 0) switch(c)
2166
- {
2167
- VSPACE_CASES:
2168
- break;
2169
-
2170
- default:
2171
- ADD_NEW(state_offset + 1, 0);
2172
- break;
2173
- }
2174
- break;
2175
-
2176
- /*-----------------------------------------------------------------*/
2177
- case OP_VSPACE:
2178
- if (clen > 0) switch(c)
2179
- {
2180
- VSPACE_CASES:
2181
- ADD_NEW(state_offset + 1, 0);
2182
- break;
2183
-
2184
- default:
2185
- break;
2186
- }
2187
- break;
2188
-
2189
- /*-----------------------------------------------------------------*/
2190
- case OP_NOT_HSPACE:
2191
- if (clen > 0) switch(c)
2192
- {
2193
- HSPACE_CASES:
2194
- break;
2195
-
2196
- default:
2197
- ADD_NEW(state_offset + 1, 0);
2198
- break;
2199
- }
2200
- break;
2201
-
2202
- /*-----------------------------------------------------------------*/
2203
- case OP_HSPACE:
2204
- if (clen > 0) switch(c)
2205
- {
2206
- HSPACE_CASES:
2207
- ADD_NEW(state_offset + 1, 0);
2208
- break;
2209
-
2210
- default:
2211
- break;
2212
- }
2213
- break;
2214
-
2215
- /*-----------------------------------------------------------------*/
2216
- /* Match a negated single character casefully. */
2217
-
2218
- case OP_NOT:
2219
- if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2220
- break;
2221
-
2222
- /*-----------------------------------------------------------------*/
2223
- /* Match a negated single character caselessly. */
2224
-
2225
- case OP_NOTI:
2226
- if (clen > 0)
2227
- {
2228
- unsigned int otherd;
2229
- #ifdef SUPPORT_UTF
2230
- if (utf && d >= 128)
2231
- {
2232
- #ifdef SUPPORT_UCP
2233
- otherd = UCD_OTHERCASE(d);
2234
- #endif /* SUPPORT_UCP */
2235
- }
2236
- else
2237
- #endif /* SUPPORT_UTF */
2238
- otherd = TABLE_GET(d, fcc, d);
2239
- if (c != d && c != otherd)
2240
- { ADD_NEW(state_offset + dlen + 1, 0); }
2241
- }
2242
- break;
2243
-
2244
- /*-----------------------------------------------------------------*/
2245
- case OP_PLUSI:
2246
- case OP_MINPLUSI:
2247
- case OP_POSPLUSI:
2248
- case OP_NOTPLUSI:
2249
- case OP_NOTMINPLUSI:
2250
- case OP_NOTPOSPLUSI:
2251
- caseless = TRUE;
2252
- codevalue -= OP_STARI - OP_STAR;
2253
-
2254
- /* Fall through */
2255
- case OP_PLUS:
2256
- case OP_MINPLUS:
2257
- case OP_POSPLUS:
2258
- case OP_NOTPLUS:
2259
- case OP_NOTMINPLUS:
2260
- case OP_NOTPOSPLUS:
2261
- count = current_state->count; /* Already matched */
2262
- if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2263
- if (clen > 0)
2264
- {
2265
- pcre_uint32 otherd = NOTACHAR;
2266
- if (caseless)
2267
- {
2268
- #ifdef SUPPORT_UTF
2269
- if (utf && d >= 128)
2270
- {
2271
- #ifdef SUPPORT_UCP
2272
- otherd = UCD_OTHERCASE(d);
2273
- #endif /* SUPPORT_UCP */
2274
- }
2275
- else
2276
- #endif /* SUPPORT_UTF */
2277
- otherd = TABLE_GET(d, fcc, d);
2278
- }
2279
- if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2280
- {
2281
- if (count > 0 &&
2282
- (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
2283
- {
2284
- active_count--; /* Remove non-match possibility */
2285
- next_active_state--;
2286
- }
2287
- count++;
2288
- ADD_NEW(state_offset, count);
2289
- }
2290
- }
2291
- break;
2292
-
2293
- /*-----------------------------------------------------------------*/
2294
- case OP_QUERYI:
2295
- case OP_MINQUERYI:
2296
- case OP_POSQUERYI:
2297
- case OP_NOTQUERYI:
2298
- case OP_NOTMINQUERYI:
2299
- case OP_NOTPOSQUERYI:
2300
- caseless = TRUE;
2301
- codevalue -= OP_STARI - OP_STAR;
2302
- /* Fall through */
2303
- case OP_QUERY:
2304
- case OP_MINQUERY:
2305
- case OP_POSQUERY:
2306
- case OP_NOTQUERY:
2307
- case OP_NOTMINQUERY:
2308
- case OP_NOTPOSQUERY:
2309
- ADD_ACTIVE(state_offset + dlen + 1, 0);
2310
- if (clen > 0)
2311
- {
2312
- pcre_uint32 otherd = NOTACHAR;
2313
- if (caseless)
2314
- {
2315
- #ifdef SUPPORT_UTF
2316
- if (utf && d >= 128)
2317
- {
2318
- #ifdef SUPPORT_UCP
2319
- otherd = UCD_OTHERCASE(d);
2320
- #endif /* SUPPORT_UCP */
2321
- }
2322
- else
2323
- #endif /* SUPPORT_UTF */
2324
- otherd = TABLE_GET(d, fcc, d);
2325
- }
2326
- if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2327
- {
2328
- if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
2329
- {
2330
- active_count--; /* Remove non-match possibility */
2331
- next_active_state--;
2332
- }
2333
- ADD_NEW(state_offset + dlen + 1, 0);
2334
- }
2335
- }
2336
- break;
2337
-
2338
- /*-----------------------------------------------------------------*/
2339
- case OP_STARI:
2340
- case OP_MINSTARI:
2341
- case OP_POSSTARI:
2342
- case OP_NOTSTARI:
2343
- case OP_NOTMINSTARI:
2344
- case OP_NOTPOSSTARI:
2345
- caseless = TRUE;
2346
- codevalue -= OP_STARI - OP_STAR;
2347
- /* Fall through */
2348
- case OP_STAR:
2349
- case OP_MINSTAR:
2350
- case OP_POSSTAR:
2351
- case OP_NOTSTAR:
2352
- case OP_NOTMINSTAR:
2353
- case OP_NOTPOSSTAR:
2354
- ADD_ACTIVE(state_offset + dlen + 1, 0);
2355
- if (clen > 0)
2356
- {
2357
- pcre_uint32 otherd = NOTACHAR;
2358
- if (caseless)
2359
- {
2360
- #ifdef SUPPORT_UTF
2361
- if (utf && d >= 128)
2362
- {
2363
- #ifdef SUPPORT_UCP
2364
- otherd = UCD_OTHERCASE(d);
2365
- #endif /* SUPPORT_UCP */
2366
- }
2367
- else
2368
- #endif /* SUPPORT_UTF */
2369
- otherd = TABLE_GET(d, fcc, d);
2370
- }
2371
- if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2372
- {
2373
- if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2374
- {
2375
- active_count--; /* Remove non-match possibility */
2376
- next_active_state--;
2377
- }
2378
- ADD_NEW(state_offset, 0);
2379
- }
2380
- }
2381
- break;
2382
-
2383
- /*-----------------------------------------------------------------*/
2384
- case OP_EXACTI:
2385
- case OP_NOTEXACTI:
2386
- caseless = TRUE;
2387
- codevalue -= OP_STARI - OP_STAR;
2388
- /* Fall through */
2389
- case OP_EXACT:
2390
- case OP_NOTEXACT:
2391
- count = current_state->count; /* Number already matched */
2392
- if (clen > 0)
2393
- {
2394
- pcre_uint32 otherd = NOTACHAR;
2395
- if (caseless)
2396
- {
2397
- #ifdef SUPPORT_UTF
2398
- if (utf && d >= 128)
2399
- {
2400
- #ifdef SUPPORT_UCP
2401
- otherd = UCD_OTHERCASE(d);
2402
- #endif /* SUPPORT_UCP */
2403
- }
2404
- else
2405
- #endif /* SUPPORT_UTF */
2406
- otherd = TABLE_GET(d, fcc, d);
2407
- }
2408
- if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2409
- {
2410
- if (++count >= GET2(code, 1))
2411
- { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2412
- else
2413
- { ADD_NEW(state_offset, count); }
2414
- }
2415
- }
2416
- break;
2417
-
2418
- /*-----------------------------------------------------------------*/
2419
- case OP_UPTOI:
2420
- case OP_MINUPTOI:
2421
- case OP_POSUPTOI:
2422
- case OP_NOTUPTOI:
2423
- case OP_NOTMINUPTOI:
2424
- case OP_NOTPOSUPTOI:
2425
- caseless = TRUE;
2426
- codevalue -= OP_STARI - OP_STAR;
2427
- /* Fall through */
2428
- case OP_UPTO:
2429
- case OP_MINUPTO:
2430
- case OP_POSUPTO:
2431
- case OP_NOTUPTO:
2432
- case OP_NOTMINUPTO:
2433
- case OP_NOTPOSUPTO:
2434
- ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2435
- count = current_state->count; /* Number already matched */
2436
- if (clen > 0)
2437
- {
2438
- pcre_uint32 otherd = NOTACHAR;
2439
- if (caseless)
2440
- {
2441
- #ifdef SUPPORT_UTF
2442
- if (utf && d >= 128)
2443
- {
2444
- #ifdef SUPPORT_UCP
2445
- otherd = UCD_OTHERCASE(d);
2446
- #endif /* SUPPORT_UCP */
2447
- }
2448
- else
2449
- #endif /* SUPPORT_UTF */
2450
- otherd = TABLE_GET(d, fcc, d);
2451
- }
2452
- if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2453
- {
2454
- if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2455
- {
2456
- active_count--; /* Remove non-match possibility */
2457
- next_active_state--;
2458
- }
2459
- if (++count >= GET2(code, 1))
2460
- { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2461
- else
2462
- { ADD_NEW(state_offset, count); }
2463
- }
2464
- }
2465
- break;
2466
-
2467
-
2468
- /* ========================================================================== */
2469
- /* These are the class-handling opcodes */
2470
-
2471
- case OP_CLASS:
2472
- case OP_NCLASS:
2473
- case OP_XCLASS:
2474
- {
2475
- BOOL isinclass = FALSE;
2476
- int next_state_offset;
2477
- const pcre_uchar *ecode;
2478
-
2479
- /* For a simple class, there is always just a 32-byte table, and we
2480
- can set isinclass from it. */
2481
-
2482
- if (codevalue != OP_XCLASS)
2483
- {
2484
- ecode = code + 1 + (32 / sizeof(pcre_uchar));
2485
- if (clen > 0)
2486
- {
2487
- isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2488
- ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2489
- }
2490
- }
2491
-
2492
- /* An extended class may have a table or a list of single characters,
2493
- ranges, or both, and it may be positive or negative. There's a
2494
- function that sorts all this out. */
2495
-
2496
- else
2497
- {
2498
- ecode = code + GET(code, 1);
2499
- if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2500
- }
2501
-
2502
- /* At this point, isinclass is set for all kinds of class, and ecode
2503
- points to the byte after the end of the class. If there is a
2504
- quantifier, this is where it will be. */
2505
-
2506
- next_state_offset = (int)(ecode - start_code);
2507
-
2508
- switch (*ecode)
2509
- {
2510
- case OP_CRSTAR:
2511
- case OP_CRMINSTAR:
2512
- ADD_ACTIVE(next_state_offset + 1, 0);
2513
- if (isinclass) { ADD_NEW(state_offset, 0); }
2514
- break;
2515
-
2516
- case OP_CRPLUS:
2517
- case OP_CRMINPLUS:
2518
- count = current_state->count; /* Already matched */
2519
- if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
2520
- if (isinclass) { count++; ADD_NEW(state_offset, count); }
2521
- break;
2522
-
2523
- case OP_CRQUERY:
2524
- case OP_CRMINQUERY:
2525
- ADD_ACTIVE(next_state_offset + 1, 0);
2526
- if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
2527
- break;
2528
-
2529
- case OP_CRRANGE:
2530
- case OP_CRMINRANGE:
2531
- count = current_state->count; /* Already matched */
2532
- if (count >= GET2(ecode, 1))
2533
- { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2534
- if (isinclass)
2535
- {
2536
- unsigned int max = GET2(ecode, 1 + IMM2_SIZE);
2537
- if (++count >= max && max != 0) /* Max 0 => no limit */
2538
- { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2539
- else
2540
- { ADD_NEW(state_offset, count); }
2541
- }
2542
- break;
2543
-
2544
- default:
2545
- if (isinclass) { ADD_NEW(next_state_offset, 0); }
2546
- break;
2547
- }
2548
- }
2549
- break;
2550
-
2551
- /* ========================================================================== */
2552
- /* These are the opcodes for fancy brackets of various kinds. We have
2553
- to use recursion in order to handle them. The "always failing" assertion
2554
- (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2555
- though the other "backtracking verbs" are not supported. */
2556
-
2557
- case OP_FAIL:
2558
- forced_fail++; /* Count FAILs for multiple states */
2559
- break;
2560
-
2561
- case OP_ASSERT:
2562
- case OP_ASSERT_NOT:
2563
- case OP_ASSERTBACK:
2564
- case OP_ASSERTBACK_NOT:
2565
- {
2566
- int rc;
2567
- int local_offsets[2];
2568
- int local_workspace[1000];
2569
- const pcre_uchar *endasscode = code + GET(code, 1);
2570
-
2571
- while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2572
-
2573
- rc = internal_dfa_exec(
2574
- md, /* static match data */
2575
- code, /* this subexpression's code */
2576
- ptr, /* where we currently are */
2577
- (int)(ptr - start_subject), /* start offset */
2578
- local_offsets, /* offset vector */
2579
- sizeof(local_offsets)/sizeof(int), /* size of same */
2580
- local_workspace, /* workspace vector */
2581
- sizeof(local_workspace)/sizeof(int), /* size of same */
2582
- rlevel); /* function recursion level */
2583
-
2584
- if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2585
- if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2586
- { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2587
- }
2588
- break;
2589
-
2590
- /*-----------------------------------------------------------------*/
2591
- case OP_COND:
2592
- case OP_SCOND:
2593
- {
2594
- int local_offsets[1000];
2595
- int local_workspace[1000];
2596
- int codelink = GET(code, 1);
2597
- int condcode;
2598
-
2599
- /* Because of the way auto-callout works during compile, a callout item
2600
- is inserted between OP_COND and an assertion condition. This does not
2601
- happen for the other conditions. */
2602
-
2603
- if (code[LINK_SIZE+1] == OP_CALLOUT)
2604
- {
2605
- rrc = 0;
2606
- if (PUBL(callout) != NULL)
2607
- {
2608
- PUBL(callout_block) cb;
2609
- cb.version = 1; /* Version 1 of the callout block */
2610
- cb.callout_number = code[LINK_SIZE+2];
2611
- cb.offset_vector = offsets;
2612
- #if defined COMPILE_PCRE8
2613
- cb.subject = (PCRE_SPTR)start_subject;
2614
- #elif defined COMPILE_PCRE16
2615
- cb.subject = (PCRE_SPTR16)start_subject;
2616
- #elif defined COMPILE_PCRE32
2617
- cb.subject = (PCRE_SPTR32)start_subject;
2618
- #endif
2619
- cb.subject_length = (int)(end_subject - start_subject);
2620
- cb.start_match = (int)(current_subject - start_subject);
2621
- cb.current_position = (int)(ptr - start_subject);
2622
- cb.pattern_position = GET(code, LINK_SIZE + 3);
2623
- cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2624
- cb.capture_top = 1;
2625
- cb.capture_last = -1;
2626
- cb.callout_data = md->callout_data;
2627
- cb.mark = NULL; /* No (*MARK) support */
2628
- if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */
2629
- }
2630
- if (rrc > 0) break; /* Fail this thread */
2631
- code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */
2632
- }
2633
-
2634
- condcode = code[LINK_SIZE+1];
2635
-
2636
- /* Back reference conditions are not supported */
2637
-
2638
- if (condcode == OP_CREF || condcode == OP_NCREF)
2639
- return PCRE_ERROR_DFA_UCOND;
2640
-
2641
- /* The DEFINE condition is always false */
2642
-
2643
- if (condcode == OP_DEF)
2644
- { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2645
-
2646
- /* The only supported version of OP_RREF is for the value RREF_ANY,
2647
- which means "test if in any recursion". We can't test for specifically
2648
- recursed groups. */
2649
-
2650
- else if (condcode == OP_RREF || condcode == OP_NRREF)
2651
- {
2652
- int value = GET2(code, LINK_SIZE + 2);
2653
- if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2654
- if (md->recursive != NULL)
2655
- { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2656
- else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2657
- }
2658
-
2659
- /* Otherwise, the condition is an assertion */
2660
-
2661
- else
2662
- {
2663
- int rc;
2664
- const pcre_uchar *asscode = code + LINK_SIZE + 1;
2665
- const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2666
-
2667
- while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2668
-
2669
- rc = internal_dfa_exec(
2670
- md, /* fixed match data */
2671
- asscode, /* this subexpression's code */
2672
- ptr, /* where we currently are */
2673
- (int)(ptr - start_subject), /* start offset */
2674
- local_offsets, /* offset vector */
2675
- sizeof(local_offsets)/sizeof(int), /* size of same */
2676
- local_workspace, /* workspace vector */
2677
- sizeof(local_workspace)/sizeof(int), /* size of same */
2678
- rlevel); /* function recursion level */
2679
-
2680
- if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2681
- if ((rc >= 0) ==
2682
- (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2683
- { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2684
- else
2685
- { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2686
- }
2687
- }
2688
- break;
2689
-
2690
- /*-----------------------------------------------------------------*/
2691
- case OP_RECURSE:
2692
- {
2693
- dfa_recursion_info *ri;
2694
- int local_offsets[1000];
2695
- int local_workspace[1000];
2696
- const pcre_uchar *callpat = start_code + GET(code, 1);
2697
- int recno = (callpat == md->start_code)? 0 :
2698
- GET2(callpat, 1 + LINK_SIZE);
2699
- int rc;
2700
-
2701
- DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2702
-
2703
- /* Check for repeating a recursion without advancing the subject
2704
- pointer. This should catch convoluted mutual recursions. (Some simple
2705
- cases are caught at compile time.) */
2706
-
2707
- for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2708
- if (recno == ri->group_num && ptr == ri->subject_position)
2709
- return PCRE_ERROR_RECURSELOOP;
2710
-
2711
- /* Remember this recursion and where we started it so as to
2712
- catch infinite loops. */
2713
-
2714
- new_recursive.group_num = recno;
2715
- new_recursive.subject_position = ptr;
2716
- new_recursive.prevrec = md->recursive;
2717
- md->recursive = &new_recursive;
2718
-
2719
- rc = internal_dfa_exec(
2720
- md, /* fixed match data */
2721
- callpat, /* this subexpression's code */
2722
- ptr, /* where we currently are */
2723
- (int)(ptr - start_subject), /* start offset */
2724
- local_offsets, /* offset vector */
2725
- sizeof(local_offsets)/sizeof(int), /* size of same */
2726
- local_workspace, /* workspace vector */
2727
- sizeof(local_workspace)/sizeof(int), /* size of same */
2728
- rlevel); /* function recursion level */
2729
-
2730
- md->recursive = new_recursive.prevrec; /* Done this recursion */
2731
-
2732
- DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2733
- rc));
2734
-
2735
- /* Ran out of internal offsets */
2736
-
2737
- if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
2738
-
2739
- /* For each successful matched substring, set up the next state with a
2740
- count of characters to skip before trying it. Note that the count is in
2741
- characters, not bytes. */
2742
-
2743
- if (rc > 0)
2744
- {
2745
- for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2746
- {
2747
- int charcount = local_offsets[rc+1] - local_offsets[rc];
2748
- #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2749
- if (utf)
2750
- {
2751
- const pcre_uchar *p = start_subject + local_offsets[rc];
2752
- const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2753
- while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2754
- }
2755
- #endif
2756
- if (charcount > 0)
2757
- {
2758
- ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
2759
- }
2760
- else
2761
- {
2762
- ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
2763
- }
2764
- }
2765
- }
2766
- else if (rc != PCRE_ERROR_NOMATCH) return rc;
2767
- }
2768
- break;
2769
-
2770
- /*-----------------------------------------------------------------*/
2771
- case OP_BRAPOS:
2772
- case OP_SBRAPOS:
2773
- case OP_CBRAPOS:
2774
- case OP_SCBRAPOS:
2775
- case OP_BRAPOSZERO:
2776
- {
2777
- int charcount, matched_count;
2778
- const pcre_uchar *local_ptr = ptr;
2779
- BOOL allow_zero;
2780
-
2781
- if (codevalue == OP_BRAPOSZERO)
2782
- {
2783
- allow_zero = TRUE;
2784
- codevalue = *(++code); /* Codevalue will be one of above BRAs */
2785
- }
2786
- else allow_zero = FALSE;
2787
-
2788
- /* Loop to match the subpattern as many times as possible as if it were
2789
- a complete pattern. */
2790
-
2791
- for (matched_count = 0;; matched_count++)
2792
- {
2793
- int local_offsets[2];
2794
- int local_workspace[1000];
2795
-
2796
- int rc = internal_dfa_exec(
2797
- md, /* fixed match data */
2798
- code, /* this subexpression's code */
2799
- local_ptr, /* where we currently are */
2800
- (int)(ptr - start_subject), /* start offset */
2801
- local_offsets, /* offset vector */
2802
- sizeof(local_offsets)/sizeof(int), /* size of same */
2803
- local_workspace, /* workspace vector */
2804
- sizeof(local_workspace)/sizeof(int), /* size of same */
2805
- rlevel); /* function recursion level */
2806
-
2807
- /* Failed to match */
2808
-
2809
- if (rc < 0)
2810
- {
2811
- if (rc != PCRE_ERROR_NOMATCH) return rc;
2812
- break;
2813
- }
2814
-
2815
- /* Matched: break the loop if zero characters matched. */
2816
-
2817
- charcount = local_offsets[1] - local_offsets[0];
2818
- if (charcount == 0) break;
2819
- local_ptr += charcount; /* Advance temporary position ptr */
2820
- }
2821
-
2822
- /* At this point we have matched the subpattern matched_count
2823
- times, and local_ptr is pointing to the character after the end of the
2824
- last match. */
2825
-
2826
- if (matched_count > 0 || allow_zero)
2827
- {
2828
- const pcre_uchar *end_subpattern = code;
2829
- int next_state_offset;
2830
-
2831
- do { end_subpattern += GET(end_subpattern, 1); }
2832
- while (*end_subpattern == OP_ALT);
2833
- next_state_offset =
2834
- (int)(end_subpattern - start_code + LINK_SIZE + 1);
2835
-
2836
- /* Optimization: if there are no more active states, and there
2837
- are no new states yet set up, then skip over the subject string
2838
- right here, to save looping. Otherwise, set up the new state to swing
2839
- into action when the end of the matched substring is reached. */
2840
-
2841
- if (i + 1 >= active_count && new_count == 0)
2842
- {
2843
- ptr = local_ptr;
2844
- clen = 0;
2845
- ADD_NEW(next_state_offset, 0);
2846
- }
2847
- else
2848
- {
2849
- const pcre_uchar *p = ptr;
2850
- const pcre_uchar *pp = local_ptr;
2851
- charcount = (int)(pp - p);
2852
- #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2853
- if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2854
- #endif
2855
- ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2856
- }
2857
- }
2858
- }
2859
- break;
2860
-
2861
- /*-----------------------------------------------------------------*/
2862
- case OP_ONCE:
2863
- case OP_ONCE_NC:
2864
- {
2865
- int local_offsets[2];
2866
- int local_workspace[1000];
2867
-
2868
- int rc = internal_dfa_exec(
2869
- md, /* fixed match data */
2870
- code, /* this subexpression's code */
2871
- ptr, /* where we currently are */
2872
- (int)(ptr - start_subject), /* start offset */
2873
- local_offsets, /* offset vector */
2874
- sizeof(local_offsets)/sizeof(int), /* size of same */
2875
- local_workspace, /* workspace vector */
2876
- sizeof(local_workspace)/sizeof(int), /* size of same */
2877
- rlevel); /* function recursion level */
2878
-
2879
- if (rc >= 0)
2880
- {
2881
- const pcre_uchar *end_subpattern = code;
2882
- int charcount = local_offsets[1] - local_offsets[0];
2883
- int next_state_offset, repeat_state_offset;
2884
-
2885
- do { end_subpattern += GET(end_subpattern, 1); }
2886
- while (*end_subpattern == OP_ALT);
2887
- next_state_offset =
2888
- (int)(end_subpattern - start_code + LINK_SIZE + 1);
2889
-
2890
- /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2891
- arrange for the repeat state also to be added to the relevant list.
2892
- Calculate the offset, or set -1 for no repeat. */
2893
-
2894
- repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2895
- *end_subpattern == OP_KETRMIN)?
2896
- (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
2897
-
2898
- /* If we have matched an empty string, add the next state at the
2899
- current character pointer. This is important so that the duplicate
2900
- checking kicks in, which is what breaks infinite loops that match an
2901
- empty string. */
2902
-
2903
- if (charcount == 0)
2904
- {
2905
- ADD_ACTIVE(next_state_offset, 0);
2906
- }
2907
-
2908
- /* Optimization: if there are no more active states, and there
2909
- are no new states yet set up, then skip over the subject string
2910
- right here, to save looping. Otherwise, set up the new state to swing
2911
- into action when the end of the matched substring is reached. */
2912
-
2913
- else if (i + 1 >= active_count && new_count == 0)
2914
- {
2915
- ptr += charcount;
2916
- clen = 0;
2917
- ADD_NEW(next_state_offset, 0);
2918
-
2919
- /* If we are adding a repeat state at the new character position,
2920
- we must fudge things so that it is the only current state.
2921
- Otherwise, it might be a duplicate of one we processed before, and
2922
- that would cause it to be skipped. */
2923
-
2924
- if (repeat_state_offset >= 0)
2925
- {
2926
- next_active_state = active_states;
2927
- active_count = 0;
2928
- i = -1;
2929
- ADD_ACTIVE(repeat_state_offset, 0);
2930
- }
2931
- }
2932
- else
2933
- {
2934
- #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2935
- if (utf)
2936
- {
2937
- const pcre_uchar *p = start_subject + local_offsets[0];
2938
- const pcre_uchar *pp = start_subject + local_offsets[1];
2939
- while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2940
- }
2941
- #endif
2942
- ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2943
- if (repeat_state_offset >= 0)
2944
- { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
2945
- }
2946
- }
2947
- else if (rc != PCRE_ERROR_NOMATCH) return rc;
2948
- }
2949
- break;
2950
-
2951
-
2952
- /* ========================================================================== */
2953
- /* Handle callouts */
2954
-
2955
- case OP_CALLOUT:
2956
- rrc = 0;
2957
- if (PUBL(callout) != NULL)
2958
- {
2959
- PUBL(callout_block) cb;
2960
- cb.version = 1; /* Version 1 of the callout block */
2961
- cb.callout_number = code[1];
2962
- cb.offset_vector = offsets;
2963
- #if defined COMPILE_PCRE8
2964
- cb.subject = (PCRE_SPTR)start_subject;
2965
- #elif defined COMPILE_PCRE16
2966
- cb.subject = (PCRE_SPTR16)start_subject;
2967
- #elif defined COMPILE_PCRE32
2968
- cb.subject = (PCRE_SPTR32)start_subject;
2969
- #endif
2970
- cb.subject_length = (int)(end_subject - start_subject);
2971
- cb.start_match = (int)(current_subject - start_subject);
2972
- cb.current_position = (int)(ptr - start_subject);
2973
- cb.pattern_position = GET(code, 2);
2974
- cb.next_item_length = GET(code, 2 + LINK_SIZE);
2975
- cb.capture_top = 1;
2976
- cb.capture_last = -1;
2977
- cb.callout_data = md->callout_data;
2978
- cb.mark = NULL; /* No (*MARK) support */
2979
- if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */
2980
- }
2981
- if (rrc == 0)
2982
- { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
2983
- break;
2984
-
2985
-
2986
- /* ========================================================================== */
2987
- default: /* Unsupported opcode */
2988
- return PCRE_ERROR_DFA_UITEM;
2989
- }
2990
-
2991
- NEXT_ACTIVE_STATE: continue;
2992
-
2993
- } /* End of loop scanning active states */
2994
-
2995
- /* We have finished the processing at the current subject character. If no
2996
- new states have been set for the next character, we have found all the
2997
- matches that we are going to find. If we are at the top level and partial
2998
- matching has been requested, check for appropriate conditions.
2999
-
3000
- The "forced_ fail" variable counts the number of (*F) encountered for the
3001
- character. If it is equal to the original active_count (saved in
3002
- workspace[1]) it means that (*F) was found on every active state. In this
3003
- case we don't want to give a partial match.
3004
-
3005
- The "could_continue" variable is true if a state could have continued but
3006
- for the fact that the end of the subject was reached. */
3007
-
3008
- if (new_count <= 0)
3009
- {
3010
- if (rlevel == 1 && /* Top level, and */
3011
- could_continue && /* Some could go on, and */
3012
- forced_fail != workspace[1] && /* Not all forced fail & */
3013
- ( /* either... */
3014
- (md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
3015
- || /* or... */
3016
- ((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
3017
- match_count < 0) /* no matches */
3018
- ) && /* And... */
3019
- (
3020
- partial_newline || /* Either partial NL */
3021
- ( /* or ... */
3022
- ptr >= end_subject && /* End of subject and */
3023
- ptr > md->start_used_ptr) /* Inspected non-empty string */
3024
- )
3025
- )
3026
- {
3027
- if (offsetcount >= 2)
3028
- {
3029
- offsets[0] = (int)(md->start_used_ptr - start_subject);
3030
- offsets[1] = (int)(end_subject - start_subject);
3031
- }
3032
- match_count = PCRE_ERROR_PARTIAL;
3033
- }
3034
-
3035
- DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3036
- "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
3037
- rlevel*2-2, SP));
3038
- break; /* In effect, "return", but see the comment below */
3039
- }
3040
-
3041
- /* One or more states are active for the next character. */
3042
-
3043
- ptr += clen; /* Advance to next subject character */
3044
- } /* Loop to move along the subject string */
3045
-
3046
- /* Control gets here from "break" a few lines above. We do it this way because
3047
- if we use "return" above, we have compiler trouble. Some compilers warn if
3048
- there's nothing here because they think the function doesn't return a value. On
3049
- the other hand, if we put a dummy statement here, some more clever compilers
3050
- complain that it can't be reached. Sigh. */
3051
-
3052
- return match_count;
3053
- }
3054
-
3055
-
3056
-
3057
-
3058
- /*************************************************
3059
- * Execute a Regular Expression - DFA engine *
3060
- *************************************************/
3061
-
3062
- /* This external function applies a compiled re to a subject string using a DFA
3063
- engine. This function calls the internal function multiple times if the pattern
3064
- is not anchored.
3065
-
3066
- Arguments:
3067
- argument_re points to the compiled expression
3068
- extra_data points to extra data or is NULL
3069
- subject points to the subject string
3070
- length length of subject string (may contain binary zeros)
3071
- start_offset where to start in the subject string
3072
- options option bits
3073
- offsets vector of match offsets
3074
- offsetcount size of same
3075
- workspace workspace vector
3076
- wscount size of same
3077
-
3078
- Returns: > 0 => number of match offset pairs placed in offsets
3079
- = 0 => offsets overflowed; longest matches are present
3080
- -1 => failed to match
3081
- < -1 => some kind of unexpected problem
3082
- */
3083
-
3084
- #if defined COMPILE_PCRE8
3085
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3086
- pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3087
- const char *subject, int length, int start_offset, int options, int *offsets,
3088
- int offsetcount, int *workspace, int wscount)
3089
- #elif defined COMPILE_PCRE16
3090
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3091
- pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3092
- PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3093
- int offsetcount, int *workspace, int wscount)
3094
- #elif defined COMPILE_PCRE32
3095
- PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3096
- pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
3097
- PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3098
- int offsetcount, int *workspace, int wscount)
3099
- #endif
3100
- {
3101
- REAL_PCRE *re = (REAL_PCRE *)argument_re;
3102
- dfa_match_data match_block;
3103
- dfa_match_data *md = &match_block;
3104
- BOOL utf, anchored, startline, firstline;
3105
- const pcre_uchar *current_subject, *end_subject;
3106
- const pcre_study_data *study = NULL;
3107
-
3108
- const pcre_uchar *req_char_ptr;
3109
- const pcre_uint8 *start_bits = NULL;
3110
- BOOL has_first_char = FALSE;
3111
- BOOL has_req_char = FALSE;
3112
- pcre_uchar first_char = 0;
3113
- pcre_uchar first_char2 = 0;
3114
- pcre_uchar req_char = 0;
3115
- pcre_uchar req_char2 = 0;
3116
- int newline;
3117
-
3118
- /* Plausibility checks */
3119
-
3120
- if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3121
- if (re == NULL || subject == NULL || workspace == NULL ||
3122
- (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3123
- if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3124
- if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3125
- if (length < 0) return PCRE_ERROR_BADLENGTH;
3126
- if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3127
-
3128
- /* Check that the first field in the block is the magic number. If it is not,
3129
- return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3130
- REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3131
- means that the pattern is likely compiled with different endianness. */
3132
-
3133
- if (re->magic_number != MAGIC_NUMBER)
3134
- return re->magic_number == REVERSED_MAGIC_NUMBER?
3135
- PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3136
- if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3137
-
3138
- /* If restarting after a partial match, do some sanity checks on the contents
3139
- of the workspace. */
3140
-
3141
- if ((options & PCRE_DFA_RESTART) != 0)
3142
- {
3143
- if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3144
- workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3145
- return PCRE_ERROR_DFA_BADRESTART;
3146
- }
3147
-
3148
- /* Set up study, callout, and table data */
3149
-
3150
- md->tables = re->tables;
3151
- md->callout_data = NULL;
3152
-
3153
- if (extra_data != NULL)
3154
- {
3155
- unsigned int flags = extra_data->flags;
3156
- if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3157
- study = (const pcre_study_data *)extra_data->study_data;
3158
- if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
3159
- if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3160
- return PCRE_ERROR_DFA_UMLIMIT;
3161
- if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3162
- md->callout_data = extra_data->callout_data;
3163
- if ((flags & PCRE_EXTRA_TABLES) != 0)
3164
- md->tables = extra_data->tables;
3165
- }
3166
-
3167
- /* Set some local values */
3168
-
3169
- current_subject = (const pcre_uchar *)subject + start_offset;
3170
- end_subject = (const pcre_uchar *)subject + length;
3171
- req_char_ptr = current_subject - 1;
3172
-
3173
- #ifdef SUPPORT_UTF
3174
- /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
3175
- utf = (re->options & PCRE_UTF8) != 0;
3176
- #else
3177
- utf = FALSE;
3178
- #endif
3179
-
3180
- anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
3181
- (re->options & PCRE_ANCHORED) != 0;
3182
-
3183
- /* The remaining fixed data for passing around. */
3184
-
3185
- md->start_code = (const pcre_uchar *)argument_re +
3186
- re->name_table_offset + re->name_count * re->name_entry_size;
3187
- md->start_subject = (const pcre_uchar *)subject;
3188
- md->end_subject = end_subject;
3189
- md->start_offset = start_offset;
3190
- md->moptions = options;
3191
- md->poptions = re->options;
3192
-
3193
- /* If the BSR option is not set at match time, copy what was set
3194
- at compile time. */
3195
-
3196
- if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
3197
- {
3198
- if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
3199
- md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
3200
- #ifdef BSR_ANYCRLF
3201
- else md->moptions |= PCRE_BSR_ANYCRLF;
3202
- #endif
3203
- }
3204
-
3205
- /* Handle different types of newline. The three bits give eight cases. If
3206
- nothing is set at run time, whatever was used at compile time applies. */
3207
-
3208
- switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
3209
- PCRE_NEWLINE_BITS)
3210
- {
3211
- case 0: newline = NEWLINE; break; /* Compile-time default */
3212
- case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
3213
- case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
3214
- case PCRE_NEWLINE_CR+
3215
- PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
3216
- case PCRE_NEWLINE_ANY: newline = -1; break;
3217
- case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
3218
- default: return PCRE_ERROR_BADNEWLINE;
3219
- }
3220
-
3221
- if (newline == -2)
3222
- {
3223
- md->nltype = NLTYPE_ANYCRLF;
3224
- }
3225
- else if (newline < 0)
3226
- {
3227
- md->nltype = NLTYPE_ANY;
3228
- }
3229
- else
3230
- {
3231
- md->nltype = NLTYPE_FIXED;
3232
- if (newline > 255)
3233
- {
3234
- md->nllen = 2;
3235
- md->nl[0] = (newline >> 8) & 255;
3236
- md->nl[1] = newline & 255;
3237
- }
3238
- else
3239
- {
3240
- md->nllen = 1;
3241
- md->nl[0] = newline;
3242
- }
3243
- }
3244
-
3245
- /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3246
- back the character offset. */
3247
-
3248
- #ifdef SUPPORT_UTF
3249
- if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3250
- {
3251
- int erroroffset;
3252
- int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3253
- if (errorcode != 0)
3254
- {
3255
- if (offsetcount >= 2)
3256
- {
3257
- offsets[0] = erroroffset;
3258
- offsets[1] = errorcode;
3259
- }
3260
- #if defined COMPILE_PCRE8
3261
- return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
3262
- PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3263
- #elif defined COMPILE_PCRE16
3264
- return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
3265
- PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
3266
- #elif defined COMPILE_PCRE32
3267
- return PCRE_ERROR_BADUTF32;
3268
- #endif
3269
- }
3270
- #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3271
- if (start_offset > 0 && start_offset < length &&
3272
- NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3273
- return PCRE_ERROR_BADUTF8_OFFSET;
3274
- #endif
3275
- }
3276
- #endif
3277
-
3278
- /* If the exec call supplied NULL for tables, use the inbuilt ones. This
3279
- is a feature that makes it possible to save compiled regex and re-use them
3280
- in other programs later. */
3281
-
3282
- if (md->tables == NULL) md->tables = PRIV(default_tables);
3283
-
3284
- /* The "must be at the start of a line" flags are used in a loop when finding
3285
- where to start. */
3286
-
3287
- startline = (re->flags & PCRE_STARTLINE) != 0;
3288
- firstline = (re->options & PCRE_FIRSTLINE) != 0;
3289
-
3290
- /* Set up the first character to match, if available. The first_byte value is
3291
- never set for an anchored regular expression, but the anchoring may be forced
3292
- at run time, so we have to test for anchoring. The first char may be unset for
3293
- an unanchored pattern, of course. If there's no first char and the pattern was
3294
- studied, there may be a bitmap of possible first characters. */
3295
-
3296
- if (!anchored)
3297
- {
3298
- if ((re->flags & PCRE_FIRSTSET) != 0)
3299
- {
3300
- has_first_char = TRUE;
3301
- first_char = first_char2 = (pcre_uchar)(re->first_char);
3302
- if ((re->flags & PCRE_FCH_CASELESS) != 0)
3303
- {
3304
- first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3305
- #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3306
- if (utf && first_char > 127)
3307
- first_char2 = UCD_OTHERCASE(first_char);
3308
- #endif
3309
- }
3310
- }
3311
- else
3312
- {
3313
- if (!startline && study != NULL &&
3314
- (study->flags & PCRE_STUDY_MAPPED) != 0)
3315
- start_bits = study->start_bits;
3316
- }
3317
- }
3318
-
3319
- /* For anchored or unanchored matches, there may be a "last known required
3320
- character" set. */
3321
-
3322
- if ((re->flags & PCRE_REQCHSET) != 0)
3323
- {
3324
- has_req_char = TRUE;
3325
- req_char = req_char2 = (pcre_uchar)(re->req_char);
3326
- if ((re->flags & PCRE_RCH_CASELESS) != 0)
3327
- {
3328
- req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3329
- #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3330
- if (utf && req_char > 127)
3331
- req_char2 = UCD_OTHERCASE(req_char);
3332
- #endif
3333
- }
3334
- }
3335
-
3336
- /* Call the main matching function, looping for a non-anchored regex after a
3337
- failed match. If not restarting, perform certain optimizations at the start of
3338
- a match. */
3339
-
3340
- for (;;)
3341
- {
3342
- int rc;
3343
-
3344
- if ((options & PCRE_DFA_RESTART) == 0)
3345
- {
3346
- const pcre_uchar *save_end_subject = end_subject;
3347
-
3348
- /* If firstline is TRUE, the start of the match is constrained to the first
3349
- line of a multiline string. Implement this by temporarily adjusting
3350
- end_subject so that we stop scanning at a newline. If the match fails at
3351
- the newline, later code breaks this loop. */
3352
-
3353
- if (firstline)
3354
- {
3355
- PCRE_PUCHAR t = current_subject;
3356
- #ifdef SUPPORT_UTF
3357
- if (utf)
3358
- {
3359
- while (t < md->end_subject && !IS_NEWLINE(t))
3360
- {
3361
- t++;
3362
- ACROSSCHAR(t < end_subject, *t, t++);
3363
- }
3364
- }
3365
- else
3366
- #endif
3367
- while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3368
- end_subject = t;
3369
- }
3370
-
3371
- /* There are some optimizations that avoid running the match if a known
3372
- starting point is not found. However, there is an option that disables
3373
- these, for testing and for ensuring that all callouts do actually occur.
3374
- The option can be set in the regex by (*NO_START_OPT) or passed in
3375
- match-time options. */
3376
-
3377
- if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3378
- {
3379
- /* Advance to a known first char. */
3380
-
3381
- if (has_first_char)
3382
- {
3383
- if (first_char != first_char2)
3384
- {
3385
- pcre_uchar csc;
3386
- while (current_subject < end_subject &&
3387
- (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
3388
- current_subject++;
3389
- }
3390
- else
3391
- while (current_subject < end_subject &&
3392
- RAWUCHARTEST(current_subject) != first_char)
3393
- current_subject++;
3394
- }
3395
-
3396
- /* Or to just after a linebreak for a multiline match if possible */
3397
-
3398
- else if (startline)
3399
- {
3400
- if (current_subject > md->start_subject + start_offset)
3401
- {
3402
- #ifdef SUPPORT_UTF
3403
- if (utf)
3404
- {
3405
- while (current_subject < end_subject &&
3406
- !WAS_NEWLINE(current_subject))
3407
- {
3408
- current_subject++;
3409
- ACROSSCHAR(current_subject < end_subject, *current_subject,
3410
- current_subject++);
3411
- }
3412
- }
3413
- else
3414
- #endif
3415
- while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
3416
- current_subject++;
3417
-
3418
- /* If we have just passed a CR and the newline option is ANY or
3419
- ANYCRLF, and we are now at a LF, advance the match position by one
3420
- more character. */
3421
-
3422
- if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3423
- (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3424
- current_subject < end_subject &&
3425
- RAWUCHARTEST(current_subject) == CHAR_NL)
3426
- current_subject++;
3427
- }
3428
- }
3429
-
3430
- /* Or to a non-unique first char after study */
3431
-
3432
- else if (start_bits != NULL)
3433
- {
3434
- while (current_subject < end_subject)
3435
- {
3436
- register pcre_uint32 c = RAWUCHARTEST(current_subject);
3437
- #ifndef COMPILE_PCRE8
3438
- if (c > 255) c = 255;
3439
- #endif
3440
- if ((start_bits[c/8] & (1 << (c&7))) == 0)
3441
- {
3442
- current_subject++;
3443
- #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3444
- /* In non 8-bit mode, the iteration will stop for
3445
- characters > 255 at the beginning or not stop at all. */
3446
- if (utf)
3447
- ACROSSCHAR(current_subject < end_subject, *current_subject,
3448
- current_subject++);
3449
- #endif
3450
- }
3451
- else break;
3452
- }
3453
- }
3454
- }
3455
-
3456
- /* Restore fudged end_subject */
3457
-
3458
- end_subject = save_end_subject;
3459
-
3460
- /* The following two optimizations are disabled for partial matching or if
3461
- disabling is explicitly requested (and of course, by the test above, this
3462
- code is not obeyed when restarting after a partial match). */
3463
-
3464
- if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3465
- (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3466
- {
3467
- /* If the pattern was studied, a minimum subject length may be set. This
3468
- is a lower bound; no actual string of that length may actually match the
3469
- pattern. Although the value is, strictly, in characters, we treat it as
3470
- bytes to avoid spending too much time in this optimization. */
3471
-
3472
- if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
3473
- (pcre_uint32)(end_subject - current_subject) < study->minlength)
3474
- return PCRE_ERROR_NOMATCH;
3475
-
3476
- /* If req_char is set, we know that that character must appear in the
3477
- subject for the match to succeed. If the first character is set, req_char
3478
- must be later in the subject; otherwise the test starts at the match
3479
- point. This optimization can save a huge amount of work in patterns with
3480
- nested unlimited repeats that aren't going to match. Writing separate
3481
- code for cased/caseless versions makes it go faster, as does using an
3482
- autoincrement and backing off on a match.
3483
-
3484
- HOWEVER: when the subject string is very, very long, searching to its end
3485
- can take a long time, and give bad performance on quite ordinary
3486
- patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3487
- string... so we don't do this when the string is sufficiently long. */
3488
-
3489
- if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3490
- {
3491
- register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3492
-
3493
- /* We don't need to repeat the search if we haven't yet reached the
3494
- place we found it at last time. */
3495
-
3496
- if (p > req_char_ptr)
3497
- {
3498
- if (req_char != req_char2)
3499
- {
3500
- while (p < end_subject)
3501
- {
3502
- register pcre_uint32 pp = RAWUCHARINCTEST(p);
3503
- if (pp == req_char || pp == req_char2) { p--; break; }
3504
- }
3505
- }
3506
- else
3507
- {
3508
- while (p < end_subject)
3509
- {
3510
- if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
3511
- }
3512
- }
3513
-
3514
- /* If we can't find the required character, break the matching loop,
3515
- which will cause a return or PCRE_ERROR_NOMATCH. */
3516
-
3517
- if (p >= end_subject) break;
3518
-
3519
- /* If we have found the required character, save the point where we
3520
- found it, so that we don't search again next time round the loop if
3521
- the start hasn't passed this character yet. */
3522
-
3523
- req_char_ptr = p;
3524
- }
3525
- }
3526
- }
3527
- } /* End of optimizations that are done when not restarting */
3528
-
3529
- /* OK, now we can do the business */
3530
-
3531
- md->start_used_ptr = current_subject;
3532
- md->recursive = NULL;
3533
-
3534
- rc = internal_dfa_exec(
3535
- md, /* fixed match data */
3536
- md->start_code, /* this subexpression's code */
3537
- current_subject, /* where we currently are */
3538
- start_offset, /* start offset in subject */
3539
- offsets, /* offset vector */
3540
- offsetcount, /* size of same */
3541
- workspace, /* workspace vector */
3542
- wscount, /* size of same */
3543
- 0); /* function recurse level */
3544
-
3545
- /* Anything other than "no match" means we are done, always; otherwise, carry
3546
- on only if not anchored. */
3547
-
3548
- if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
3549
-
3550
- /* Advance to the next subject character unless we are at the end of a line
3551
- and firstline is set. */
3552
-
3553
- if (firstline && IS_NEWLINE(current_subject)) break;
3554
- current_subject++;
3555
- #ifdef SUPPORT_UTF
3556
- if (utf)
3557
- {
3558
- ACROSSCHAR(current_subject < end_subject, *current_subject,
3559
- current_subject++);
3560
- }
3561
- #endif
3562
- if (current_subject > end_subject) break;
3563
-
3564
- /* If we have just passed a CR and we are now at a LF, and the pattern does
3565
- not contain any explicit matches for \r or \n, and the newline option is CRLF
3566
- or ANY or ANYCRLF, advance the match position by one more character. */
3567
-
3568
- if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3569
- current_subject < end_subject &&
3570
- RAWUCHARTEST(current_subject) == CHAR_NL &&
3571
- (re->flags & PCRE_HASCRORLF) == 0 &&
3572
- (md->nltype == NLTYPE_ANY ||
3573
- md->nltype == NLTYPE_ANYCRLF ||
3574
- md->nllen == 2))
3575
- current_subject++;
3576
-
3577
- } /* "Bumpalong" loop */
3578
-
3579
- return PCRE_ERROR_NOMATCH;
3580
- }
3581
-
3582
- /* End of pcre_dfa_exec.c */