RubyGems - ots - Versions diffs - 0.4.3 → 0.4.4 - Mend

ots 0.4.3 → 0.4.4

Files changed (64) hide show

@@ -0,0 +1,128 @@
+/*
+ *  highlighter
+ *
+ *  Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "libots.h"
+/*After the grader has graded the article and each
+ sentence has a score the highlighter will select
+ some of the sentences*/
+static int
+ots_highlight_max_line (OtsArticle * Doc)
+{
+  GList *li;
+  int max = 0;
+  for (li = (GList *) Doc->lines; li != NULL; li = li->next)
+    {
+      if (0 == (((OtsSentence *) li->data)->selected))	/* if not selected , count me in */
+	max = MAX (((OtsSentence *) li->data)->score, max);
+    }
+  for (li = (GList *) Doc->lines; li != NULL; li = li->next)
+    {
+      if ((((OtsSentence *) li->data)->score == max) && (((OtsSentence *) li->data)->selected == 0))	/* if score==max && not selected before ,select me; */
+	{
+	  ((OtsSentence *) li->data)->selected = 1;
+	  return ((OtsSentence *) li->data)->wc;
+	}
+    }
+  return 0;
+}
+/* todo: impement this
+void
+ots_highlight_doc_wordcount (OtsArticle * Doc, int wordCount)
+void
+ots_highlight_doc_linecount (OtsArticle * Doc, int wordCount)
+void
+ots_highlight_doc_soft (OtsArticle * Doc, int percent) //blur selection by avrage of near sentences , will mark blocks
+*/
+void
+ots_highlight_doc (OtsArticle * Doc, int percent)
+{
+  int i;
+  double ratio;
+  int wordCount;
+  if (0 == Doc->lineCount)
+    return;
+  if (percent > 100)
+    percent = 100;
+  else if (percent < 0)
+    percent = 0;
+  ratio = ((double) (percent)) / (100.0);
+  wordCount = ots_get_article_word_count (Doc);
+  for (i = 0; i < (ratio * (double) wordCount);)
+    {
+      i += ots_highlight_max_line (Doc);
+    }
+}
+void
+ots_highlight_doc_lines (OtsArticle * Doc, int lines)
+{
+  int i;
+  int lineCount;
+  int tmp;
+  if (0 == Doc->lineCount) return;
+  lineCount = Doc->lineCount;
+  i=0;
+  while ((i<lines)&&(i<lineCount))
+  {
+  i++;
+  tmp=ots_highlight_max_line (Doc);
+  }
+}
+void ots_highlight_doc_words (OtsArticle * Doc, int words)
+{
+  int i;
+  int docWordCount;
+  if (0 == Doc->lineCount) return;
+  docWordCount = ots_get_article_word_count (Doc);
+	i=0;
+  while ((i < docWordCount) && (i <= words))
+    {
+      i += ots_highlight_max_line (Doc);
+    }
+}

data/ext/html.c ADDED

@@ -0,0 +1,131 @@
+/*
+ *  html.c
+ *
+ *  Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "libots.h"
+static unsigned char *
+ots_get_line_HTML (const OtsSentence * aLine, size_t * out_size)
+{
+  GList *li;
+  GString *text;
+  unsigned char *utf8_data;
+  char *score_str;
+  text = g_string_new (NULL);
+  score_str=g_new0(char,32);
+  sprintf(score_str,"<!--(%ld)-->",aLine->score);
+  g_string_append (text,score_str);
+  g_free(score_str);
+  if ((aLine->selected))
+    {
+      g_string_append (text,
+		       "<FONT COLOR=\"#16569E\"><span style=\'background:yellow;\'>");
+    }
+  else
+    {
+      g_string_append (text, "<FONT COLOR=\"#16569E\"><span>");
+    }
+  for (li = (GList *) aLine->words; li != NULL; li = li->next)
+    {
+      if (0 == strcmp ((char *) li->data, "\n"))
+	g_string_append (text, "<br>");
+      else
+	g_string_append (text, (char *) li->data);
+    }
+  g_string_append (text,"</span></FONT>\n");
+  if (out_size)
+    *out_size = text->len;
+  utf8_data = text->str;
+  g_string_free (text, FALSE);
+  return utf8_data;
+}
+#if 0
+static void
+ots_print_line_HTML (FILE * stream, const OtsSentence * aLine)
+{
+  unsigned char *utf8_txt;
+  size_t len;
+  utf8_txt = ots_get_line_HTML (aLine, &len);
+  fwrite (utf8_txt, 1, len, stream);
+  g_free (utf8_txt);
+}
+#endif
+unsigned char *
+ots_get_doc_HTML (const OtsArticle * Doc, size_t * out_len)
+{
+  GList *li;
+  GString *text;
+  unsigned char *utf8_data;
+  size_t line_len;
+  text = g_string_new (NULL);
+  g_string_append (text,
+		   "<html>\n<head>\n<title>OTS</title>\n<meta charset=\"utf-8\">\n</head>\n<body>\n");
+  g_string_append (text, "<!-- Generated by OpenTextSummarizer -->\n");
+  g_string_append (text, "<!--");
+  g_string_append (text, Doc->title);
+  g_string_append (text, "-->\n");
+  for (li = (GList *) Doc->lines; li != NULL; li = li->next)
+    {
+      utf8_data = ots_get_line_HTML ((OtsSentence *) li->data, &line_len);
+      g_string_append_len (text, utf8_data, line_len);
+      g_free (utf8_data);
+    }
+  g_string_append (text, "</body></html>\n");
+  if (out_len)
+    *out_len = text->len;
+  utf8_data = text->str;
+  g_string_free (text, FALSE);
+  return utf8_data;
+}
+void
+ots_print_HTML (FILE * stream, const OtsArticle * Doc)
+{
+  unsigned char *utf8_txt;
+  size_t len;
+  utf8_txt = ots_get_doc_HTML (Doc, &len);
+  fwrite (utf8_txt, 1, len, stream);
+  g_free (utf8_txt);
+}

data/ext/libots.h ADDED

@@ -0,0 +1,158 @@
+/*
+ *  libots.h
+ *
+ *  Copyright (C) 2003 Nadav Rotem <nadav256@hotmail.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#ifndef HAVE_LIBOTS_H
+#define HAVE_LIBOTS_H
+#include <glib.h>
+G_BEGIN_DECLS
+typedef struct
+{ 			/* the Term Frequency data structure */
+  char* word;
+  double tf; /*Also used for TF*/
+} OtsWordTF;
+typedef struct
+{
+	/*a GList of char*  */
+  GList *RemovePre;   /* (a|b)  replace string a with b */
+  GList *RemovePost;
+  GList *step1_pre;
+  GList *step1_post;
+  GList *synonyms;
+  GList *manual;
+  GList *ParserBreak;
+  GList *ParserDontBreak;
+	/*to be implemented*/
+  GList *ReplaceChars;
+} OtsStemRule;
+typedef struct
+{
+  GList *words;			 /* a Glist of words (char*) */
+  glong score;				 /*score set by the grader*/
+  gboolean selected;     /*is selected?*/
+  gint wc;               /*word count*/
+  void *user_data;       /*pointer to the original sentence , or serial number maybe*/
+} OtsSentence;
+typedef struct
+{
+  GList *lines;			   /* a Glist of sentences (struct Sentence) */
+  gint lineCount;          /*lines in the text*/
+  char *title;              /*title , auto generated*/
+  OtsStemRule *stem;       /*stemming & parsing rules*/
+ /*Term Frequency grader*/
+  GList *tf_terms;
+  GList *idf_terms;
+  /*Term Count grader*/
+  GList *dict; 		/* dictionary from xml*/
+  GList *wordStat;	/* a wordlist of all words in the article and their occ */
+  GList *ImpWords;   /*important words - for term count grader*/
+} OtsArticle;
+OtsArticle *ots_new_article (void);
+void ots_free_article (OtsArticle *art);
+/*parser*/
+void ots_parse_file (FILE * stream, OtsArticle * Doc);	/*file input */
+void ots_parse_stream(const unsigned char *utf8 , size_t len ,OtsArticle *Doc); /*parse unicode stream*/
+OtsSentence *ots_append_line (OtsArticle * Doc);
+void ots_append_word (OtsSentence * aLine,unsigned const char *aWord);
+void ots_add_wordstat (OtsArticle * Doc,unsigned const char *wordString);
+/*dictionary*/
+gboolean ots_load_xml_dictionary (OtsArticle * Doc, const char *name);
+int ots_get_article_word_count (const OtsArticle * Doc);
+/*grader*/
+void ots_highlight_doc (OtsArticle * Doc, int percent);     /*example: 20%*/
+void ots_highlight_doc_lines (OtsArticle * Doc, int lines); /*example: 10 lines*/
+void ots_highlight_doc_words (OtsArticle * Doc, int words); /*example: 50 words*/
+void ots_grade_doc (OtsArticle * Doc);
+void  ots_free_OtsWordTF(OtsWordTF *obj); /*todo: put in .h file*/
+OtsWordTF*  ots_new_OtsWordTF(const char* word,const double idf);
+/*HTML output*/
+void ots_print_HTML (FILE * stream, const OtsArticle * Doc);
+unsigned char *ots_get_doc_HTML (const OtsArticle * Doc, size_t * out_len);
+/*TEXT output*/
+void ots_print_doc (FILE * stream, const OtsArticle * Doc);
+unsigned char *ots_get_doc_text (const OtsArticle * Doc, size_t * out_len);
+/*Plugin writing*/
+unsigned char* ots_get_line_text (const OtsSentence *aLine, gboolean only_if_selected, size_t *out_size);
+gboolean ots_is_line_selected(const OtsSentence *aLine);
+/*Stemm support*/
+OtsStemRule *new_stem_rule(void);
+void free_stem_rule (OtsStemRule *rule);
+unsigned char * ots_stem_strip (unsigned const char * aWord, const OtsStemRule *rule); /*returns newly allocated string with the root of the word*/
+unsigned char *ots_stem_format (unsigned const char *aWord, const OtsStemRule * rule); /*Remove leading spaces, comas, colons, etc. */
+/*Relations between texts*/
+/*Returns the number of topics that two blocks of text share*/
+int ots_text_relations(
+const unsigned char *text1,const unsigned char *lang_code1,
+const unsigned char *text2,const unsigned char *lang_code2,const int topic_num);
+/*For a given text, return the list of the topics*/
+char* ots_text_topics(const unsigned char *text,const unsigned char *lang_code,int topic_num);
+/*For a given text, return the list of the stemmed topics*/
+GList* ots_text_stem_list(const unsigned char *text,const unsigned char *lang_code,int topic_num);
+/*Gives a score on the relations between two lists of topics; simmilar to the inner product*/
+int ots_topic_list_score(const GList *topic_list1,const GList *topic_list2);
+G_END_DECLS
+#endif /* HAVE_LIBOTS_H */

data/ext/ots.c CHANGED

@@ -1,197 +1,176 @@
-#include <ruby.h>
+#include "ots.h"
+#include <sys/types.h>
+#include <dirent.h>
+#include <errno.h>
-/* ruby 1.9 only */
-#ifdef RUBY_VM
-  #include <ruby/encoding.h>
-#endif
+static VALUE mOTS, cArticle;
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
+static void article_free(OtsArticle *article) {
+    if (article)
+      ots_free_article(article);
+}
-#include <libots-1/ots/libots.h>
+VALUE article_allocate(VALUE klass) {
+    OtsArticle *article = ots_new_article();
+    return Data_Wrap_Struct(klass, 0, article_free, article);
+}
-#define ID_CONST_GET rb_intern("const_get")
-#define CONST_GET(scope, constant) (rb_funcall(scope, ID_CONST_GET, 1, rb_str_new2(constant)))
+OtsArticle* article_handle(VALUE self) {
+    OtsArticle *article = 0;
+    Data_Get_Struct(self, OtsArticle, article);
+    if (!article)
+        rb_raise(rb_eArgError, "invalid OTS::Article instance");
+    return article;
+}
-static VALUE rb_cOTS;
-static VALUE eLoadError;
-static VALUE eRuntimeError;
-static VALUE eArgumentError;
+void article_load_dictionary(OtsArticle *article, char *name) {
+  if (!ots_load_xml_dictionary(article, name)) {
+    rb_raise(rb_eLoadError, "Could not find dictionary file: %s", name);
+  }
+}
-typedef struct {
-  gchar *word;    /* the word */
-  gchar *stem;    /*stem of the word*/
-  gint occ;     /* how many times have we seen this word in the text? */
-} OtsWordEntery;
+VALUE article_initialize(int argc, VALUE *argv, VALUE self) {
+    VALUE text, dictionary;
+    OtsArticle *article = article_handle(self);
+    rb_scan_args(argc, argv, "11", &text, &dictionary);
-/* helpers */
+    if (TYPE(text) != T_STRING)
+        rb_raise(rb_eArgError, "invalid +text+");
-OtsArticle* get_article(VALUE self, gboolean error_on_missing) {
-  VALUE rb_article_object = rb_iv_get(self, "@article");
-  if (rb_article_object == Qnil) {
-    if (error_on_missing)
-      rb_raise(eRuntimeError, "libots document not initialized properly. Did you forget to parse content ?");
+    if (NIL_P(dictionary))
+        article_load_dictionary(article, "en");
     else
-      return NULL;
-  }
-  return (OtsArticle *)DATA_PTR(rb_article_object);
-}
+        article_load_dictionary(article, CSTRING(dictionary));
+    ots_parse_stream(RSTRING_PTR(text), RSTRING_LEN(text), article);
+    ots_grade_doc(article);
-void rb_ots_free_article(VALUE self) {
-  OtsArticle *article = DATA_PTR(rb_iv_get(self, "@article"));
-  ots_free_article(article);
+    rb_iv_set(self, "@encoding", (VALUE)rb_enc_get(text));
+    return self;
 }
-VALUE rb_string(char *utf8) {
-  VALUE str = rb_str_new(utf8, strlen(utf8));
-  /* ruby 1.9 only - force bytestream to utf8 */
-  #ifdef RUBY_VM
-    rb_enc_associate(str, rb_to_encoding(rb_str_new2("UTF-8")));
-    ENC_CODERANGE_CLEAR(str);
-  #endif
+VALUE article_summary(OtsArticle *article, rb_encoding *encoding) {
+  OtsSentence *sentence;
-  return str;
-}
+  GList *line_ptr  = article->lines;
+  VALUE summary    = rb_ary_new();
-/* ruby libots methods/wrappers */
+  while (line_ptr != NULL) {
+    sentence = (OtsSentence *)line_ptr->data;
-VALUE rb_ots_init(VALUE self) {
-  OtsArticle *article = get_article(self, FALSE);
-  VALUE dict = Qnil;
-  if (article != NULL) {
-    dict = rb_iv_get(self, "@dict");
-    ots_free_article(article);
-  }
-  article = ots_new_article();
-  rb_iv_set(self, "@article", Data_Wrap_Struct(rb_cObject, 0, 0, article));
-  rb_iv_set(self, "@dict", dict);
-  return self;
-}
+    if (sentence->selected) {
+      size_t size;
+      unsigned char* content = ots_get_line_text(sentence, TRUE, &size);
-VALUE rb_ots_load_dictionary(VALUE self, VALUE dict) {
-  char *dict_cstr = "en";
-  if (dict != Qnil) dict_cstr = RSTRING_PTR(dict);
+      VALUE line = rb_hash_new();
+      rb_hash_aset(line, ID2SYM(rb_intern("sentence")), rb_enc_str_new((char *)content, size, encoding));
+      rb_hash_aset(line, ID2SYM(rb_intern("score")),    LONG2FIX(sentence->score));
+      rb_ary_push(summary, line);
-  OtsArticle *article = get_article(self, FALSE);
-  if (article == NULL) {
-    rb_ots_init(self);
-    article = get_article(self, TRUE);
-  }
+      // reset this so subsequent calls work right.
+      sentence->selected = FALSE;
+    }
-  if (!ots_load_xml_dictionary(article, (unsigned const char *)dict_cstr)) {
-    rb_ots_free_article(self);
-    rb_raise(eLoadError, "Could not find dictionary file: %s", dict_cstr);
+    line_ptr = g_list_next(line_ptr);
   }
-  rb_iv_set(self, "@dict", dict);
-  return Qtrue;
+  return summary;
 }
-VALUE rb_ots_parse_string(VALUE self, VALUE string) {
-  const unsigned char *string_cstr = (const unsigned char *)RSTRING_PTR(string);
-  size_t string_len = RSTRING_LEN(string);
+VALUE article_summarize(VALUE self, VALUE options) {
+    VALUE lines, percent;
+    OtsArticle *article = article_handle(self);
-  rb_ots_init(self);
-  rb_ots_load_dictionary(self, rb_iv_get(self, "@dict"));
-  OtsArticle *article = get_article(self, TRUE);
-  ots_parse_stream(string_cstr, string_len, article);
-  ots_grade_doc(article);
-  return Qtrue;
-}
+    if (TYPE(options) != T_HASH)
+        rb_raise(rb_eArgError, "expect an options hash");
-VALUE rb_ots_highlight_lines(VALUE self, int lines) {
-  OtsArticle *article = get_article(self, TRUE);
-  ots_highlight_doc_lines(article, lines);
-  return Qtrue;
-}
+    lines   = rb_hash_aref(options, ID2SYM(rb_intern("lines")));
+    percent = rb_hash_aref(options, ID2SYM(rb_intern("percent")));
+    if (NIL_P(lines) && NIL_P(percent))
+        rb_raise(rb_eArgError, "expect +lines+ or +percent+ to be provided");
-VALUE rb_ots_highlight_percent(VALUE self, int percent) {
-  OtsArticle *article = get_article(self, TRUE);
-  ots_highlight_doc(article, percent);
-  return Qtrue;
+    if (lines != Qnil)
+        ots_highlight_doc_lines(article, NUM2INT(lines));
+    else
+        ots_highlight_doc(article, NUM2INT(percent));
+    return article_summary(article, (rb_encoding *)rb_iv_get(self, "@encoding"));
 }
-VALUE rb_ots_article_title(VALUE self) {
-  OtsArticle *article = get_article(self, TRUE);
-  if (article->title != NULL)
-    return rb_string(article->title);
-  else
-    return Qnil;
+VALUE article_title(VALUE self) {
+    OtsArticle *article = article_handle(self);
+    return (article->title ? rb_enc_str_new2(article->title, (rb_encoding*)rb_iv_get(self, "@encoding")) : Qnil);
 }
-VALUE rb_ots_article_keywords(VALUE self) {
-  OtsArticle *article = get_article(self, TRUE);
-  GList* words = article->ImpWords;
-  VALUE iwords = rb_ary_new();
-  while (words != NULL) {
-    OtsWordEntery *data = (OtsWordEntery *)words->data;
-    if (data != NULL && strlen(data->word) > 0)
-      rb_ary_push(iwords, rb_string(data->word));
-    words = words->next;
-  }
+typedef struct {
+  gchar *word;    /* the word */
+  gchar *stem;    /*stem of the word*/
+  gint occ;     /* how many times have we seen this word in the text? */
+} OtsWordEntry;
-  return iwords;
-}
-VALUE rb_ots_get_highlighted_lines(VALUE self) {
-  OtsArticle *article = get_article(self, TRUE);
-  OtsSentence *sentence;
-  GList *curr_line = article->lines;
-  VALUE hlt_lines = rb_ary_new();
+VALUE article_keywords(VALUE self) {
+    OtsArticle *article = article_handle(self);
+    rb_encoding *encoding = (rb_encoding*)rb_iv_get(self, "@encoding");
-  while (curr_line != NULL) {
-    sentence = (OtsSentence *)curr_line->data;
-    if (sentence->selected) {
-      size_t len;
-      unsigned char* content = ots_get_line_text(sentence, TRUE, &len);
-      VALUE hlt_line = rb_hash_new();
-      rb_hash_aset(hlt_line, ID2SYM(rb_intern("sentence")), rb_string((char *)content));
-      rb_hash_aset(hlt_line, ID2SYM(rb_intern("score")), LONG2FIX(sentence->score));
-      rb_ary_push(hlt_lines, hlt_line);
+    VALUE words     = rb_ary_new();
+    GList* word_ptr = article->ImpWords;
+    while (word_ptr) {
+        OtsWordEntry *data = (OtsWordEntry *)word_ptr->data;
+        if (data && strlen(data->word) > 0)
+            rb_ary_push(words, rb_enc_str_new2(data->word, encoding));
+        word_ptr = word_ptr->next;
     }
-    curr_line = g_list_next(curr_line);
-  }
-  return hlt_lines;
+    return words;
 }
-VALUE rb_summarize(VALUE self, VALUE options) {
-  VALUE lines = rb_hash_aref(options, ID2SYM(rb_intern("lines")));
-  VALUE percent = rb_hash_aref(options, ID2SYM(rb_intern("percent")));
+VALUE ots_parse(int argc, VALUE *argv, VALUE self) {
+    VALUE article = article_allocate(cArticle);
+    article_initialize(argc, argv, article);
+    return article;
+}
-  if (lines != Qnil && percent != Qnil) {
-    rb_ots_free_article(self);
-    rb_raise(eArgumentError, "Cannot summarize on :lines & :percent, only one is allowed");
-  }
-  else if (lines == Qnil && percent == Qnil) {
-    rb_ots_free_article(self);
-    rb_raise(eArgumentError, "Need either :lines or :percent to summarize");
-  }
+VALUE ots_dictionaries(VALUE self) {
+    DIR *dir;
+    struct dirent *entry;
+    VALUE dictionaries = rb_ary_new();
+    if ((dir = opendir(DICTIONARY_DIR))) {
+        while ((entry = readdir(dir))) {
+            // entry->d_type is not portable.
+            if (strstr(entry->d_name, ".xml"))
+                rb_ary_push(dictionaries, rb_str_new(entry->d_name, strlen(entry->d_name) - 4));
+        }
+    }
+    else {
+        rb_raise(rb_eIOError, "unable to open dictionary directory: %s", strerror(errno));
+    }
-  if (lines != Qnil)
-    rb_ots_highlight_lines(self, FIX2INT(lines));
-  else if (percent != Qnil)
-    rb_ots_highlight_percent(self, FIX2INT(percent));
-  return rb_ots_get_highlighted_lines(self);
+    closedir(dir);
+    return dictionaries;
 }
 /* init */
 void Init_ots(void) {
-    eLoadError     = CONST_GET(rb_mKernel, "LoadError");
-    eRuntimeError  = CONST_GET(rb_mKernel, "RuntimeError");
-    eArgumentError = CONST_GET(rb_mKernel, "ArgumentError");
-    rb_cOTS = rb_define_class("OTS", rb_cObject);
-    rb_define_method(rb_cOTS, "load_dictionary", rb_ots_load_dictionary, 1);
-    rb_define_method(rb_cOTS, "parse", rb_ots_parse_string, 1);
-    rb_define_method(rb_cOTS, "highlight_lines", rb_ots_highlight_lines, 1);
-    rb_define_method(rb_cOTS, "highlight_percent", rb_ots_highlight_percent, 1);
-    rb_define_method(rb_cOTS, "highlighted_content", rb_ots_get_highlighted_lines, 0);
-    rb_define_method(rb_cOTS, "summarize", rb_summarize, 1);
-    rb_define_method(rb_cOTS, "title", rb_ots_article_title, 0);
-    rb_define_method(rb_cOTS, "keywords", rb_ots_article_keywords, 0);
+    mOTS      = rb_define_module("OTS");
+    cArticle  = rb_define_class_under(mOTS, "Article", rb_cObject);
+    rb_define_method(cArticle, "initialize", RUBY_METHOD_FUNC(article_initialize), -1);
+    rb_define_method(cArticle, "summarize",  RUBY_METHOD_FUNC(article_summarize),   1);
+    rb_define_method(cArticle, "title",      RUBY_METHOD_FUNC(article_title),       0);
+    rb_define_method(cArticle, "keywords",   RUBY_METHOD_FUNC(article_keywords),    0);
+    rb_define_module_function(mOTS, "parse",        RUBY_METHOD_FUNC(ots_parse),       -1);
+    rb_define_module_function(mOTS, "dictionaries", RUBY_METHOD_FUNC(ots_dictionaries), 0);
+    rb_define_alloc_func(cArticle, article_allocate);
+    rb_define_const(mOTS, "VERSION", rb_str_new2(RUBY_OTS_VERSION));
 }