npm - kuromoji-ko - Versions diffs - 1.0.0 → 1.0.2 - Mend

kuromoji-ko 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -20,6 +20,24 @@ npm install kuromoji-ko
 ## Quick Start
+### napi-mecab Compatible API (Recommended)
+```javascript
+import { MeCab } from 'kuromoji-ko';
+const mecab = await MeCab.create({ engine: 'ko', dictPath: './dict' });
+const tokens = mecab.parse('안녕하세요');
+for (const token of tokens) {
+  console.log(token.surface, token.pos, token.lemma);
+}
+// 안녕 ['NNG'] 안녕
+// 하 ['XSV'] 하다
+// 세요 ['EF'] 세요
+```
+### Classic API
 ```javascript
 import kuromoji from 'kuromoji-ko';
@@ -53,7 +71,62 @@ This creates binary dictionary files in the `./dict` directory.
 ## API
-### `kuromoji.builder(options)`
+### MeCab API (napi-mecab compatible)
+#### `MeCab.create(options)`
+Create a MeCab instance asynchronously.
+```javascript
+import { MeCab } from 'kuromoji-ko';
+const mecab = await MeCab.create({
+  engine: 'ko',      // Only 'ko' is supported
+  dictPath: './dict' // Path to dictionary directory
+});
+```
+#### `mecab.parse(text)`
+Parse text into an array of Token objects.
+```javascript
+const tokens = mecab.parse('아버지가방에들어가신다');
+tokens.forEach(t => console.log(t.surface, t.pos));
+```
+### Token Object (napi-mecab compatible)
+| Property | Type | Description |
+|----------|------|-------------|
+| `surface` | `string` | How the token looks in the input text |
+| `pos` | `string[]` | Parts of speech as array (split by "+") |
+| `lemma` | `string` | Dictionary headword (adds "다" for verbs) |
+| `pronunciation` | `string \| null` | How the token is pronounced |
+| `hasBatchim` | `boolean \| null` | Whether token has final consonant (받침) |
+| `hasJongseong` | `boolean \| null` | Alias for hasBatchim |
+| `semanticClass` | `string \| null` | Semantic word class or category |
+| `type` | `string \| null` | Token type (Inflect/Compound/Preanalysis) |
+| `expression` | `ExpressionToken[] \| null` | Breakdown of compound/inflected tokens |
+| `features` | `string` | Raw features string (comma-separated) |
+| `raw` | `string` | Raw MeCab output format (surface\tfeatures) |
+### ExpressionToken Object
+For compound or inflected words, `expression` returns an array of ExpressionToken:
+| Property | Type | Description |
+|----------|------|-------------|
+| `morpheme` | `string` | The normalized token |
+| `pos` | `string` | Part of speech |
+| `lemma` | `string` | Dictionary form (adds "다" for verbs) |
+| `semanticClass` | `string \| null` | Semantic category |
+---
+### Classic API
+#### `kuromoji.builder(options)`
 Create a tokenizer builder.
@@ -98,9 +171,9 @@ const str = tokenizer.wakatiString('한국어 형태소 분석');
 // '한국어 형태소 분석'
 ```
-## Token Object
+## KoreanToken Object (Classic API)
-Each token has the following properties:
+Each token from `tokenizer.tokenize()` has the following properties:
 | Property | Description | Example |
 |----------|-------------|---------|

package/dist/index.cjs CHANGED Viewed

@@ -30,8 +30,11 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 // src/index.ts
 var index_exports = {};
 __export(index_exports, {
+  ExpressionToken: () => ExpressionToken,
   KoreanToken: () => KoreanToken,
+  MeCab: () => MeCab,
   POS_TAGS: () => POS_TAGS,
+  Token: () => Token,
   Tokenizer: () => Tokenizer,
   TokenizerBuilder: () => TokenizerBuilder,
   builder: () => builder,
@@ -1401,15 +1404,238 @@ var TokenizerBuilder = class {
   }
 };
+// src/ExpressionToken.ts
+var VERB_TAGS = ["VV", "VA", "VX", "VCP", "VCN"];
+function nullIfStar(value) {
+  return value === "*" ? null : value;
+}
+var ExpressionToken = class {
+  constructor(raw) {
+    const parts = raw.split("/");
+    this._morpheme = parts[0] ?? "";
+    this._pos = parts[1] ?? "";
+    this._semanticClass = parts[2] ?? "*";
+  }
+  /**
+   * The normalized token/morpheme
+   */
+  get morpheme() {
+    return this._morpheme;
+  }
+  /**
+   * The part of speech tag
+   */
+  get pos() {
+    return this._pos;
+  }
+  /**
+   * The dictionary form (adds 다 for verbs)
+   */
+  get lemma() {
+    if (VERB_TAGS.includes(this._pos)) {
+      return this._morpheme + "\uB2E4";
+    }
+    return this._morpheme;
+  }
+  /**
+   * The semantic word class or category
+   */
+  get semanticClass() {
+    return nullIfStar(this._semanticClass);
+  }
+};
+// src/Token.ts
+var VERB_TAGS2 = ["VV", "VA", "VX", "VCP", "VCN"];
+function nullIfStar2(value) {
+  return value === "*" ? null : value;
+}
+var Token = class {
+  constructor(token) {
+    this._token = token;
+  }
+  /**
+   * How the token looks in the input text
+   */
+  get surface() {
+    return this._token.surface_form;
+  }
+  /**
+   * The raw features string (comma-separated)
+   */
+  get features() {
+    return [
+      this._token.pos,
+      this._token.semantic_class,
+      this._token.has_final_consonant,
+      this._token.reading,
+      this._token.type,
+      this._token.first_pos,
+      this._token.last_pos,
+      this._token.expression
+    ].join(",");
+  }
+  /**
+   * The raw string in MeCab format (surface\tfeatures)
+   */
+  get raw() {
+    return `${this.surface}	${this.features}`;
+  }
+  /**
+   * Parts of speech as an array (split by "+")
+   */
+  get pos() {
+    return this._token.pos.split("+");
+  }
+  /**
+   * The dictionary headword (adds 다 for verbs)
+   */
+  get lemma() {
+    const basePos = this.pos[0];
+    if (VERB_TAGS2.includes(basePos)) {
+      return this.surface + "\uB2E4";
+    }
+    return this.surface;
+  }
+  /**
+   * How the token is pronounced
+   */
+  get pronunciation() {
+    return nullIfStar2(this._token.reading);
+  }
+  /**
+   * Whether the token has a final consonant (받침/batchim)
+   */
+  get hasBatchim() {
+    const val = this._token.has_final_consonant;
+    if (val === "T") return true;
+    if (val === "F") return false;
+    return null;
+  }
+  /**
+   * Alias for hasBatchim (종성/jongseong)
+   */
+  get hasJongseong() {
+    return this.hasBatchim;
+  }
+  /**
+   * The semantic word class or category
+   */
+  get semanticClass() {
+    return nullIfStar2(this._token.semantic_class);
+  }
+  /**
+   * The type of token (Inflect/Compound/Preanalysis)
+   */
+  get type() {
+    return nullIfStar2(this._token.type);
+  }
+  /**
+   * The broken-down expression tokens for compound/inflected words
+   */
+  get expression() {
+    if (this._token.expression === "*") return null;
+    return this._token.expression.split("+").map((part) => new ExpressionToken(part));
+  }
+  /**
+   * Get the underlying KoreanToken
+   */
+  get koreanToken() {
+    return this._token;
+  }
+};
+// src/MeCab.ts
+var MeCab = class _MeCab {
+  constructor(tokenizer) {
+    this.tokenizer = tokenizer;
+  }
+  /**
+   * Create a MeCab instance asynchronously.
+   *
+   * Unlike napi-mecab which uses a synchronous constructor,
+   * this pure JavaScript implementation requires async initialization
+   * to load the dictionary files without blocking.
+   *
+   * @example
+   * ```typescript
+   * const mecab = await MeCab.create({ engine: 'ko' });
+   * const tokens = mecab.parse('안녕하세요');
+   * ```
+   */
+  static async create(opts = {}) {
+    const engine = opts.engine ?? "ko";
+    if (engine !== "ko") {
+      throw new Error(
+        `"${engine}" is not a supported mecab engine. Only "ko" (Korean) is supported.`
+      );
+    }
+    const builder2 = new TokenizerBuilder({
+      dicPath: opts.dictPath
+    });
+    const tokenizer = await builder2.build();
+    return new _MeCab(tokenizer);
+  }
+  /**
+   * Parse text into an array of tokens.
+   *
+   * @param text - The text to parse
+   * @returns Array of Token objects
+   *
+   * @example
+   * ```typescript
+   * const tokens = mecab.parse('아버지가방에들어가신다');
+   * tokens.forEach(t => console.log(t.surface, t.pos));
+   * ```
+   */
+  parse(text) {
+    const koreanTokens = this.tokenizer.tokenize(text);
+    return koreanTokens.map((token) => new Token(token));
+  }
+  /**
+   * Get just the surface forms as an array.
+   * Convenience method equivalent to napi-mecab parse + map surface.
+   */
+  wakati(text) {
+    return this.tokenizer.wakati(text);
+  }
+  /**
+   * Get space-separated surface forms.
+   */
+  wakatiString(text) {
+    return this.tokenizer.wakatiString(text);
+  }
+  /**
+   * Access the underlying Tokenizer for advanced usage.
+   */
+  get underlyingTokenizer() {
+    return this.tokenizer;
+  }
+};
 // src/index.ts
 function builder(options = {}) {
   return new TokenizerBuilder(options);
 }
-var index_default = { builder, TokenizerBuilder, Tokenizer, KoreanToken, POS_TAGS };
+var index_default = {
+  // Original API
+  builder,
+  TokenizerBuilder,
+  Tokenizer,
+  KoreanToken,
+  POS_TAGS,
+  // napi-mecab compatible API
+  MeCab,
+  Token,
+  ExpressionToken
+};
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
+  ExpressionToken,
   KoreanToken,
+  MeCab,
   POS_TAGS,
+  Token,
   Tokenizer,
   TokenizerBuilder,
   builder

package/dist/index.d.cts CHANGED Viewed

@@ -330,6 +330,160 @@ declare class TokenizerBuilder {
     build(): Promise<Tokenizer>;
 }
+/**
+ * ExpressionToken - represents a component of an agglutinated Korean token
+ *
+ * Korean compound/inflected words have an expression field in the format:
+ * "morpheme/pos/semanticClass+morpheme/pos/semanticClass+..."
+ *
+ * This class represents a single component of that expression.
+ */
+declare class ExpressionToken {
+    private _morpheme;
+    private _pos;
+    private _semanticClass;
+    constructor(raw: string);
+    /**
+     * The normalized token/morpheme
+     */
+    get morpheme(): string;
+    /**
+     * The part of speech tag
+     */
+    get pos(): string;
+    /**
+     * The dictionary form (adds 다 for verbs)
+     */
+    get lemma(): string;
+    /**
+     * The semantic word class or category
+     */
+    get semanticClass(): string | null;
+}
+/**
+ * Token - napi-mecab compatible token wrapper
+ *
+ * Provides getters that match the napi-mecab API for Korean tokens.
+ */
+declare class Token {
+    private _token;
+    constructor(token: KoreanToken);
+    /**
+     * How the token looks in the input text
+     */
+    get surface(): string;
+    /**
+     * The raw features string (comma-separated)
+     */
+    get features(): string;
+    /**
+     * The raw string in MeCab format (surface\tfeatures)
+     */
+    get raw(): string;
+    /**
+     * Parts of speech as an array (split by "+")
+     */
+    get pos(): string[];
+    /**
+     * The dictionary headword (adds 다 for verbs)
+     */
+    get lemma(): string | null;
+    /**
+     * How the token is pronounced
+     */
+    get pronunciation(): string | null;
+    /**
+     * Whether the token has a final consonant (받침/batchim)
+     */
+    get hasBatchim(): boolean | null;
+    /**
+     * Alias for hasBatchim (종성/jongseong)
+     */
+    get hasJongseong(): boolean | null;
+    /**
+     * The semantic word class or category
+     */
+    get semanticClass(): string | null;
+    /**
+     * The type of token (Inflect/Compound/Preanalysis)
+     */
+    get type(): string | null;
+    /**
+     * The broken-down expression tokens for compound/inflected words
+     */
+    get expression(): ExpressionToken[] | null;
+    /**
+     * Get the underlying KoreanToken
+     */
+    get koreanToken(): KoreanToken;
+}
+/**
+ * MeCab - napi-mecab compatible API wrapper
+ *
+ * Provides a familiar API for users coming from napi-mecab.
+ * Uses async initialization since this is a pure JavaScript implementation.
+ */
+interface MeCabOptions {
+    /**
+     * The language engine to use. Only 'ko' (Korean) is supported.
+     * @default 'ko'
+     */
+    engine?: 'ko';
+    /**
+     * Path to the dictionary directory.
+     * @default 'dict/'
+     */
+    dictPath?: string;
+}
+declare class MeCab {
+    private tokenizer;
+    private constructor();
+    /**
+     * Create a MeCab instance asynchronously.
+     *
+     * Unlike napi-mecab which uses a synchronous constructor,
+     * this pure JavaScript implementation requires async initialization
+     * to load the dictionary files without blocking.
+     *
+     * @example
+     * ```typescript
+     * const mecab = await MeCab.create({ engine: 'ko' });
+     * const tokens = mecab.parse('안녕하세요');
+     * ```
+     */
+    static create(opts?: MeCabOptions): Promise<MeCab>;
+    /**
+     * Parse text into an array of tokens.
+     *
+     * @param text - The text to parse
+     * @returns Array of Token objects
+     *
+     * @example
+     * ```typescript
+     * const tokens = mecab.parse('아버지가방에들어가신다');
+     * tokens.forEach(t => console.log(t.surface, t.pos));
+     * ```
+     */
+    parse(text: string): Token[];
+    /**
+     * Get just the surface forms as an array.
+     * Convenience method equivalent to napi-mecab parse + map surface.
+     */
+    wakati(text: string): string[];
+    /**
+     * Get space-separated surface forms.
+     */
+    wakatiString(text: string): string;
+    /**
+     * Access the underlying Tokenizer for advanced usage.
+     */
+    get underlyingTokenizer(): Tokenizer;
+}
 /**
  * mecab-ko - Pure TypeScript Korean Morphological Analyzer
  *
@@ -347,6 +501,9 @@ declare const _default: {
     Tokenizer: typeof Tokenizer;
     KoreanToken: typeof KoreanToken;
     POS_TAGS: Record<string, string>;
+    MeCab: typeof MeCab;
+    Token: typeof Token;
+    ExpressionToken: typeof ExpressionToken;
 };
-export { KoreanToken, POS_TAGS, Tokenizer, TokenizerBuilder, type TokenizerBuilderOptions, builder, _default as default };
+export { ExpressionToken, KoreanToken, MeCab, type MeCabOptions, POS_TAGS, Token, Tokenizer, TokenizerBuilder, type TokenizerBuilderOptions, builder, _default as default };

package/dist/index.d.ts CHANGED Viewed

@@ -330,6 +330,160 @@ declare class TokenizerBuilder {
     build(): Promise<Tokenizer>;
 }
+/**
+ * ExpressionToken - represents a component of an agglutinated Korean token
+ *
+ * Korean compound/inflected words have an expression field in the format:
+ * "morpheme/pos/semanticClass+morpheme/pos/semanticClass+..."
+ *
+ * This class represents a single component of that expression.
+ */
+declare class ExpressionToken {
+    private _morpheme;
+    private _pos;
+    private _semanticClass;
+    constructor(raw: string);
+    /**
+     * The normalized token/morpheme
+     */
+    get morpheme(): string;
+    /**
+     * The part of speech tag
+     */
+    get pos(): string;
+    /**
+     * The dictionary form (adds 다 for verbs)
+     */
+    get lemma(): string;
+    /**
+     * The semantic word class or category
+     */
+    get semanticClass(): string | null;
+}
+/**
+ * Token - napi-mecab compatible token wrapper
+ *
+ * Provides getters that match the napi-mecab API for Korean tokens.
+ */
+declare class Token {
+    private _token;
+    constructor(token: KoreanToken);
+    /**
+     * How the token looks in the input text
+     */
+    get surface(): string;
+    /**
+     * The raw features string (comma-separated)
+     */
+    get features(): string;
+    /**
+     * The raw string in MeCab format (surface\tfeatures)
+     */
+    get raw(): string;
+    /**
+     * Parts of speech as an array (split by "+")
+     */
+    get pos(): string[];
+    /**
+     * The dictionary headword (adds 다 for verbs)
+     */
+    get lemma(): string | null;
+    /**
+     * How the token is pronounced
+     */
+    get pronunciation(): string | null;
+    /**
+     * Whether the token has a final consonant (받침/batchim)
+     */
+    get hasBatchim(): boolean | null;
+    /**
+     * Alias for hasBatchim (종성/jongseong)
+     */
+    get hasJongseong(): boolean | null;
+    /**
+     * The semantic word class or category
+     */
+    get semanticClass(): string | null;
+    /**
+     * The type of token (Inflect/Compound/Preanalysis)
+     */
+    get type(): string | null;
+    /**
+     * The broken-down expression tokens for compound/inflected words
+     */
+    get expression(): ExpressionToken[] | null;
+    /**
+     * Get the underlying KoreanToken
+     */
+    get koreanToken(): KoreanToken;
+}
+/**
+ * MeCab - napi-mecab compatible API wrapper
+ *
+ * Provides a familiar API for users coming from napi-mecab.
+ * Uses async initialization since this is a pure JavaScript implementation.
+ */
+interface MeCabOptions {
+    /**
+     * The language engine to use. Only 'ko' (Korean) is supported.
+     * @default 'ko'
+     */
+    engine?: 'ko';
+    /**
+     * Path to the dictionary directory.
+     * @default 'dict/'
+     */
+    dictPath?: string;
+}
+declare class MeCab {
+    private tokenizer;
+    private constructor();
+    /**
+     * Create a MeCab instance asynchronously.
+     *
+     * Unlike napi-mecab which uses a synchronous constructor,
+     * this pure JavaScript implementation requires async initialization
+     * to load the dictionary files without blocking.
+     *
+     * @example
+     * ```typescript
+     * const mecab = await MeCab.create({ engine: 'ko' });
+     * const tokens = mecab.parse('안녕하세요');
+     * ```
+     */
+    static create(opts?: MeCabOptions): Promise<MeCab>;
+    /**
+     * Parse text into an array of tokens.
+     *
+     * @param text - The text to parse
+     * @returns Array of Token objects
+     *
+     * @example
+     * ```typescript
+     * const tokens = mecab.parse('아버지가방에들어가신다');
+     * tokens.forEach(t => console.log(t.surface, t.pos));
+     * ```
+     */
+    parse(text: string): Token[];
+    /**
+     * Get just the surface forms as an array.
+     * Convenience method equivalent to napi-mecab parse + map surface.
+     */
+    wakati(text: string): string[];
+    /**
+     * Get space-separated surface forms.
+     */
+    wakatiString(text: string): string;
+    /**
+     * Access the underlying Tokenizer for advanced usage.
+     */
+    get underlyingTokenizer(): Tokenizer;
+}
 /**
  * mecab-ko - Pure TypeScript Korean Morphological Analyzer
  *
@@ -347,6 +501,9 @@ declare const _default: {
     Tokenizer: typeof Tokenizer;
     KoreanToken: typeof KoreanToken;
     POS_TAGS: Record<string, string>;
+    MeCab: typeof MeCab;
+    Token: typeof Token;
+    ExpressionToken: typeof ExpressionToken;
 };
-export { KoreanToken, POS_TAGS, Tokenizer, TokenizerBuilder, type TokenizerBuilderOptions, builder, _default as default };
+export { ExpressionToken, KoreanToken, MeCab, type MeCabOptions, POS_TAGS, Token, Tokenizer, TokenizerBuilder, type TokenizerBuilderOptions, builder, _default as default };

package/dist/index.js CHANGED Viewed

@@ -1360,14 +1360,237 @@ var TokenizerBuilder = class {
   }
 };
+// src/ExpressionToken.ts
+var VERB_TAGS = ["VV", "VA", "VX", "VCP", "VCN"];
+function nullIfStar(value) {
+  return value === "*" ? null : value;
+}
+var ExpressionToken = class {
+  constructor(raw) {
+    const parts = raw.split("/");
+    this._morpheme = parts[0] ?? "";
+    this._pos = parts[1] ?? "";
+    this._semanticClass = parts[2] ?? "*";
+  }
+  /**
+   * The normalized token/morpheme
+   */
+  get morpheme() {
+    return this._morpheme;
+  }
+  /**
+   * The part of speech tag
+   */
+  get pos() {
+    return this._pos;
+  }
+  /**
+   * The dictionary form (adds 다 for verbs)
+   */
+  get lemma() {
+    if (VERB_TAGS.includes(this._pos)) {
+      return this._morpheme + "\uB2E4";
+    }
+    return this._morpheme;
+  }
+  /**
+   * The semantic word class or category
+   */
+  get semanticClass() {
+    return nullIfStar(this._semanticClass);
+  }
+};
+// src/Token.ts
+var VERB_TAGS2 = ["VV", "VA", "VX", "VCP", "VCN"];
+function nullIfStar2(value) {
+  return value === "*" ? null : value;
+}
+var Token = class {
+  constructor(token) {
+    this._token = token;
+  }
+  /**
+   * How the token looks in the input text
+   */
+  get surface() {
+    return this._token.surface_form;
+  }
+  /**
+   * The raw features string (comma-separated)
+   */
+  get features() {
+    return [
+      this._token.pos,
+      this._token.semantic_class,
+      this._token.has_final_consonant,
+      this._token.reading,
+      this._token.type,
+      this._token.first_pos,
+      this._token.last_pos,
+      this._token.expression
+    ].join(",");
+  }
+  /**
+   * The raw string in MeCab format (surface\tfeatures)
+   */
+  get raw() {
+    return `${this.surface}	${this.features}`;
+  }
+  /**
+   * Parts of speech as an array (split by "+")
+   */
+  get pos() {
+    return this._token.pos.split("+");
+  }
+  /**
+   * The dictionary headword (adds 다 for verbs)
+   */
+  get lemma() {
+    const basePos = this.pos[0];
+    if (VERB_TAGS2.includes(basePos)) {
+      return this.surface + "\uB2E4";
+    }
+    return this.surface;
+  }
+  /**
+   * How the token is pronounced
+   */
+  get pronunciation() {
+    return nullIfStar2(this._token.reading);
+  }
+  /**
+   * Whether the token has a final consonant (받침/batchim)
+   */
+  get hasBatchim() {
+    const val = this._token.has_final_consonant;
+    if (val === "T") return true;
+    if (val === "F") return false;
+    return null;
+  }
+  /**
+   * Alias for hasBatchim (종성/jongseong)
+   */
+  get hasJongseong() {
+    return this.hasBatchim;
+  }
+  /**
+   * The semantic word class or category
+   */
+  get semanticClass() {
+    return nullIfStar2(this._token.semantic_class);
+  }
+  /**
+   * The type of token (Inflect/Compound/Preanalysis)
+   */
+  get type() {
+    return nullIfStar2(this._token.type);
+  }
+  /**
+   * The broken-down expression tokens for compound/inflected words
+   */
+  get expression() {
+    if (this._token.expression === "*") return null;
+    return this._token.expression.split("+").map((part) => new ExpressionToken(part));
+  }
+  /**
+   * Get the underlying KoreanToken
+   */
+  get koreanToken() {
+    return this._token;
+  }
+};
+// src/MeCab.ts
+var MeCab = class _MeCab {
+  constructor(tokenizer) {
+    this.tokenizer = tokenizer;
+  }
+  /**
+   * Create a MeCab instance asynchronously.
+   *
+   * Unlike napi-mecab which uses a synchronous constructor,
+   * this pure JavaScript implementation requires async initialization
+   * to load the dictionary files without blocking.
+   *
+   * @example
+   * ```typescript
+   * const mecab = await MeCab.create({ engine: 'ko' });
+   * const tokens = mecab.parse('안녕하세요');
+   * ```
+   */
+  static async create(opts = {}) {
+    const engine = opts.engine ?? "ko";
+    if (engine !== "ko") {
+      throw new Error(
+        `"${engine}" is not a supported mecab engine. Only "ko" (Korean) is supported.`
+      );
+    }
+    const builder2 = new TokenizerBuilder({
+      dicPath: opts.dictPath
+    });
+    const tokenizer = await builder2.build();
+    return new _MeCab(tokenizer);
+  }
+  /**
+   * Parse text into an array of tokens.
+   *
+   * @param text - The text to parse
+   * @returns Array of Token objects
+   *
+   * @example
+   * ```typescript
+   * const tokens = mecab.parse('아버지가방에들어가신다');
+   * tokens.forEach(t => console.log(t.surface, t.pos));
+   * ```
+   */
+  parse(text) {
+    const koreanTokens = this.tokenizer.tokenize(text);
+    return koreanTokens.map((token) => new Token(token));
+  }
+  /**
+   * Get just the surface forms as an array.
+   * Convenience method equivalent to napi-mecab parse + map surface.
+   */
+  wakati(text) {
+    return this.tokenizer.wakati(text);
+  }
+  /**
+   * Get space-separated surface forms.
+   */
+  wakatiString(text) {
+    return this.tokenizer.wakatiString(text);
+  }
+  /**
+   * Access the underlying Tokenizer for advanced usage.
+   */
+  get underlyingTokenizer() {
+    return this.tokenizer;
+  }
+};
 // src/index.ts
 function builder(options = {}) {
   return new TokenizerBuilder(options);
 }
-var index_default = { builder, TokenizerBuilder, Tokenizer, KoreanToken, POS_TAGS };
+var index_default = {
+  // Original API
+  builder,
+  TokenizerBuilder,
+  Tokenizer,
+  KoreanToken,
+  POS_TAGS,
+  // napi-mecab compatible API
+  MeCab,
+  Token,
+  ExpressionToken
+};
 export {
+  ExpressionToken,
   KoreanToken,
+  MeCab,
   POS_TAGS,
+  Token,
   Tokenizer,
   TokenizerBuilder,
   builder,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "kuromoji-ko",
-  "version": "1.0.0",
+  "version": "1.0.2",
   "description": "Pure TypeScript Korean Morphological Analyzer - serverless compatible, based on kuromoji.js and mecab-ko-dic",
   "main": "dist/index.js",
   "module": "dist/index.mjs",
@@ -42,12 +42,12 @@
   "license": "Apache-2.0",
   "repository": {
     "type": "git",
-    "url": "git+https://github.com/richardpaynter/kuromoji-ko.git"
+    "url": "git+https://github.com/elfsmelf/kuromoji-ko.git"
   },
   "bugs": {
-    "url": "https://github.com/richardpaynter/kuromoji-ko/issues"
+    "url": "https://github.com/elfsmelf/kuromoji-ko/issues"
   },
-  "homepage": "https://github.com/richardpaynter/kuromoji-ko#readme",
+  "homepage": "https://github.com/elfsmelf/kuromoji-ko#readme",
   "devDependencies": {
     "@types/node": "^20.10.0",
     "@types/pako": "^2.0.4",